From 2b4cb7fdf6e3922cbb87f2779422fccaa8870931 Mon Sep 17 00:00:00 2001 From: Nathan TeBlunthuis Date: Sun, 12 Jan 2025 00:49:36 -0800 Subject: [PATCH] bugfix --- similarities/weekly_cosine_similarities.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/similarities/weekly_cosine_similarities.py b/similarities/weekly_cosine_similarities.py index a8f39d7..6ad5b8e 100755 --- a/similarities/weekly_cosine_similarities.py +++ b/similarities/weekly_cosine_similarities.py @@ -44,11 +44,12 @@ def _week_similarities(week, simfunc, tfidf_path, term_colname, included_subredd week=week, rescale_idf=False) + tfidf_colname='tf_idf' if term_ids is not None: entries = duckdb.sql(f"SELECT A.{tfidf_colname}, B.{term_id} AS {term_id_new}, A.subreddit_id_new FROM entries AS A JOIN term_ids AS B ON A.{term_id_new} == B.{term_id_old}").df() - tfidf_colname='tf_idf' + # if the max subreddit id we found is less than the number of subreddit names then we have to fill in 0s shape = (nterms,subreddit_names.shape[0]) print(shape)