From 81e12d1cefd030ff3015c24a6e8abb5e462059b9 Mon Sep 17 00:00:00 2001 From: Nathan TeBlunthuis Date: Tue, 31 Dec 2024 14:41:27 -0800 Subject: [PATCH] bugfix. --- timeseries/cluster_timeseries.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/timeseries/cluster_timeseries.py b/timeseries/cluster_timeseries.py index 0d0eefa..7ffcf0f 100644 --- a/timeseries/cluster_timeseries.py +++ b/timeseries/cluster_timeseries.py @@ -30,7 +30,7 @@ def build_cluster_timeseries(term_clusters_path="/gscratch/comdata/output/reddit clusters = load_clusters(term_clusters_path, author_clusters_path) clusters.to_parquet("/tmp/clusters.parquet") - clusters = spark.read.parquet("/tmp/clusters.parquet") + spk_clusters = spark.read.parquet("/tmp/clusters.parquet") ts = ts.join(spk_clusters, on='subreddit', how='inner') ts.write.parquet(output, mode='overwrite')