1
0

set nterms based on the new database

This commit is contained in:
Nathan TeBlunthuis 2025-01-12 01:03:52 -08:00
parent f79eb28e31
commit a9711fddf5

View File

@ -47,7 +47,8 @@ def _week_similarities(week, simfunc, tfidf_path, term_colname, included_subredd
tfidf_colname='tf_idf'
if term_ids is not None:
entries = duckdb.sql(f"SELECT A.{tfidf_colname}, B.{term_id} AS {term_id_new}, A.subreddit_id_new FROM entries AS A JOIN read_parquet('{term_ids}') AS B ON A.{term_id_new} == B.{'old_'+term_id}").df()
nterms = duckdb.sql(f"SELECT MAX({term_colname}_id AS nterms FROM read_parquet('{term_ids}')").df()
nterms = list(nterms.nterms.values)[0]
# if the max subreddit id we found is less than the number of subreddit names then we have to fill in 0s