add script for pulling cluster timeseries
This commit is contained in:
		
							parent
							
								
									a013f6718b
								
							
						
					
					
						commit
						47ba04aa97
					
				| @ -2,11 +2,11 @@ import pandas as pd | ||||
| import numpy as np | ||||
| from pyspark.sql import functions as f | ||||
| from pyspark.sql import SparkSession | ||||
| from choose_clusters import load_clusters, load_densities | ||||
| from .choose_clusters import load_clusters, load_densities | ||||
| import fire | ||||
| from pathlib import Path | ||||
| 
 | ||||
| def main(term_clusters_path="/gscratch/comdata/output/reddit_clustering/comment_terms_10000.feather", | ||||
| def build_cluster_timeseries(term_clusters_path="/gscratch/comdata/output/reddit_clustering/comment_terms_10000.feather", | ||||
|          author_clusters_path="/gscratch/comdata/output/reddit_clustering/comment_authors_10000.feather", | ||||
|          term_densities_path="/gscratch/comdata/output/reddit_density/comment_terms_10000.feather", | ||||
|          author_densities_path="/gscratch/comdata/output/reddit_density/comment_authors_10000.feather", | ||||
| @ -34,4 +34,4 @@ def main(term_clusters_path="/gscratch/comdata/output/reddit_clustering/comment_ | ||||
|     ts.write.parquet(output, mode='overwrite') | ||||
| 
 | ||||
| if __name__ == "__main__": | ||||
|     fire.Fire(main) | ||||
|     fire.Fire(build_cluster_timeseries) | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user