diff --git a/timeseries/cluster_timeseries.py b/timeseries/cluster_timeseries.py index 3b47474..a58c4f8 100644 --- a/timeseries/cluster_timeseries.py +++ b/timeseries/cluster_timeseries.py @@ -18,8 +18,8 @@ def build_cluster_timeseries(term_clusters_path="/gscratch/comdata/output/reddit df_submissions = spark.read.parquet("/gscratch/comdata/output/reddit_submissions_by_subreddit.parquet") df_comments = df_comments.select(['subreddit','CreatedAt','author']) - df_submisisons = df_comments.select(['subreddit','CreatedAt','author']) - df = df_comments.union(df_submisisons) + df_submissions = df_comments.select(['subreddit','CreatedAt','author']) + df = df_comments.union(df_submissions) df = df.withColumn('week', f.date_trunc('week', f.col("CreatedAt")))