Merge branch 'excise_reindex' of code:cdsc_reddit into excise_reindex
This commit is contained in:
		
						commit
						7b14db67de
					
				
							
								
								
									
										2
									
								
								__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										2
									
								
								__init__.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,2 @@ | ||||
| from .timeseries import load_clusters, load_densities, build_cluster_timeseries | ||||
| 
 | ||||
							
								
								
									
										2
									
								
								timeseries/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										2
									
								
								timeseries/__init__.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,2 @@ | ||||
| from .choose_clusters import load_clusters, load_densities | ||||
| from .cluster_timeseries import build_cluster_timeseries | ||||
| @ -2,11 +2,11 @@ import pandas as pd | ||||
| import numpy as np | ||||
| from pyspark.sql import functions as f | ||||
| from pyspark.sql import SparkSession | ||||
| from choose_clusters import load_clusters, load_densities | ||||
| from .choose_clusters import load_clusters, load_densities | ||||
| import fire | ||||
| from pathlib import Path | ||||
| 
 | ||||
| def main(term_clusters_path="/gscratch/comdata/output/reddit_clustering/comment_terms_10000.feather", | ||||
| def build_cluster_timeseries(term_clusters_path="/gscratch/comdata/output/reddit_clustering/comment_terms_10000.feather", | ||||
|          author_clusters_path="/gscratch/comdata/output/reddit_clustering/comment_authors_10000.feather", | ||||
|          term_densities_path="/gscratch/comdata/output/reddit_density/comment_terms_10000.feather", | ||||
|          author_densities_path="/gscratch/comdata/output/reddit_density/comment_authors_10000.feather", | ||||
| @ -34,4 +34,4 @@ def main(term_clusters_path="/gscratch/comdata/output/reddit_clustering/comment_ | ||||
|     ts.write.parquet(output, mode='overwrite') | ||||
| 
 | ||||
| if __name__ == "__main__": | ||||
|     fire.Fire(main) | ||||
|     fire.Fire(build_cluster_timeseries) | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user