Merge branch 'excise_reindex' of code:cdsc_reddit into excise_reindex
This commit is contained in:
		
						commit
						7b14db67de
					
				
							
								
								
									
										2
									
								
								__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										2
									
								
								__init__.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,2 @@ | |||||||
|  | from .timeseries import load_clusters, load_densities, build_cluster_timeseries | ||||||
|  | 
 | ||||||
							
								
								
									
										2
									
								
								timeseries/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										2
									
								
								timeseries/__init__.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,2 @@ | |||||||
|  | from .choose_clusters import load_clusters, load_densities | ||||||
|  | from .cluster_timeseries import build_cluster_timeseries | ||||||
| @ -2,11 +2,11 @@ import pandas as pd | |||||||
| import numpy as np | import numpy as np | ||||||
| from pyspark.sql import functions as f | from pyspark.sql import functions as f | ||||||
| from pyspark.sql import SparkSession | from pyspark.sql import SparkSession | ||||||
| from choose_clusters import load_clusters, load_densities | from .choose_clusters import load_clusters, load_densities | ||||||
| import fire | import fire | ||||||
| from pathlib import Path | from pathlib import Path | ||||||
| 
 | 
 | ||||||
| def main(term_clusters_path="/gscratch/comdata/output/reddit_clustering/comment_terms_10000.feather", | def build_cluster_timeseries(term_clusters_path="/gscratch/comdata/output/reddit_clustering/comment_terms_10000.feather", | ||||||
|          author_clusters_path="/gscratch/comdata/output/reddit_clustering/comment_authors_10000.feather", |          author_clusters_path="/gscratch/comdata/output/reddit_clustering/comment_authors_10000.feather", | ||||||
|          term_densities_path="/gscratch/comdata/output/reddit_density/comment_terms_10000.feather", |          term_densities_path="/gscratch/comdata/output/reddit_density/comment_terms_10000.feather", | ||||||
|          author_densities_path="/gscratch/comdata/output/reddit_density/comment_authors_10000.feather", |          author_densities_path="/gscratch/comdata/output/reddit_density/comment_authors_10000.feather", | ||||||
| @ -34,4 +34,4 @@ def main(term_clusters_path="/gscratch/comdata/output/reddit_clustering/comment_ | |||||||
|     ts.write.parquet(output, mode='overwrite') |     ts.write.parquet(output, mode='overwrite') | ||||||
| 
 | 
 | ||||||
| if __name__ == "__main__": | if __name__ == "__main__": | ||||||
|     fire.Fire(main) |     fire.Fire(build_cluster_timeseries) | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user