1
0
This commit is contained in:
Nathan TeBlunthuis 2024-12-31 14:41:27 -08:00
parent c59d251d19
commit 81e12d1cef

View File

@ -30,7 +30,7 @@ def build_cluster_timeseries(term_clusters_path="/gscratch/comdata/output/reddit
clusters = load_clusters(term_clusters_path, author_clusters_path)
clusters.to_parquet("/tmp/clusters.parquet")
clusters = spark.read.parquet("/tmp/clusters.parquet")
spk_clusters = spark.read.parquet("/tmp/clusters.parquet")
ts = ts.join(spk_clusters, on='subreddit', how='inner')
ts.write.parquet(output, mode='overwrite')