add script for pulling cluster timeseries

2021-05-10 18:24:22 -07:00 · 2021-05-10 18:24:22 -07:00 · 47ba04aa97
commit 47ba04aa97
parent a013f6718b
1 changed files with 3 additions and 3 deletions
--- a/timeseries/cluster_timeseries.py
+++ b/timeseries/cluster_timeseries.py
@ -2,11 +2,11 @@ import pandas as pd
 import numpy as np
 from pyspark.sql import functions as f
 from pyspark.sql import SparkSession
-from choose_clusters import load_clusters, load_densities
+from .choose_clusters import load_clusters, load_densities
 import fire
 from pathlib import Path
-def main(term_clusters_path="/gscratch/comdata/output/reddit_clustering/comment_terms_10000.feather",
+def build_cluster_timeseries(term_clusters_path="/gscratch/comdata/output/reddit_clustering/comment_terms_10000.feather",
         author_clusters_path="/gscratch/comdata/output/reddit_clustering/comment_authors_10000.feather",
         term_densities_path="/gscratch/comdata/output/reddit_density/comment_terms_10000.feather",
         author_densities_path="/gscratch/comdata/output/reddit_density/comment_authors_10000.feather",
@ -34,4 +34,4 @@ def main(term_clusters_path="/gscratch/comdata/output/reddit_clustering/comment_
    ts.write.parquet(output, mode='overwrite')
 if __name__ == "__main__":
-    fire.Fire(main)
+    fire.Fire(build_cluster_timeseries)