1
0
This commit is contained in:
Nathan TeBlunthuis 2025-01-12 01:17:21 -08:00
parent 3792f58d15
commit 9ae2d13573

View File

@ -48,7 +48,7 @@ def _week_similarities(week, simfunc, tfidf_path, term_colname, included_subredd
if term_ids is not None: if term_ids is not None:
print("reassigning {term_id}s") print("reassigning {term_id}s")
entries = duckdb.sql(f"SELECT A.{tfidf_colname}, B.{term_id} AS {term_id_new}, A.subreddit_id_new FROM entries AS A JOIN read_parquet('{term_ids}') AS B ON A.{term_id_new} == B.{'old_'+term_id}").df() entries = duckdb.sql(f"SELECT A.{tfidf_colname}, B.{term_id} AS {term_id_new}, A.subreddit_id_new FROM entries AS A JOIN read_parquet('{term_ids}') AS B ON A.{term_id_new} == B.{'old_'+term_id}").df()
nterms = duckdb.sql(f"SELECT MAX({term_colname}+'_id') AS nterms FROM read_parquet('{term_ids}')").df() nterms = duckdb.sql(f"SELECT MAX({term_colname+'_id'}) AS nterms FROM read_parquet('{term_ids}')").df()
nterms = list(nterms.nterms.values)[0] nterms = list(nterms.nterms.values)[0]