1
0
This commit is contained in:
Nathan TeBlunthuis 2025-01-12 00:49:36 -08:00
parent e568ee6db7
commit 2b4cb7fdf6

View File

@ -44,11 +44,12 @@ def _week_similarities(week, simfunc, tfidf_path, term_colname, included_subredd
week=week,
rescale_idf=False)
tfidf_colname='tf_idf'
if term_ids is not None:
entries = duckdb.sql(f"SELECT A.{tfidf_colname}, B.{term_id} AS {term_id_new}, A.subreddit_id_new FROM entries AS A JOIN term_ids AS B ON A.{term_id_new} == B.{term_id_old}").df()
tfidf_colname='tf_idf'
# if the max subreddit id we found is less than the number of subreddit names then we have to fill in 0s
shape = (nterms,subreddit_names.shape[0])
print(shape)