1
0
This commit is contained in:
Nathan TeBlunthuis 2024-12-28 20:02:24 -08:00
parent f11d4cfc72
commit 7aa22c7385

View File

@ -83,7 +83,7 @@ def cosine_similarities_weekly(tfidf_path, outfile, term_colname, included_subre
subreddit_names = conn.execute(f"SELECT DISTINCT subreddit, subreddit_id from read_parquet('{tfidf_path}/*/*.parquet') ORDER BY subreddit_id;").df() subreddit_names = conn.execute(f"SELECT DISTINCT subreddit, subreddit_id from read_parquet('{tfidf_path}/*/*.parquet') ORDER BY subreddit_id;").df()
if static_tfidf_path is not None: if static_tfidf_path is not None:
nterms = conn.execute(f"SELECT MAX({term_colname + '_id'}) as nterms FROM read_parquet('{static_tfidf_path}/*/*.parquet')").df() nterms = conn.execute(f"SELECT MAX({term_colname + '_id'}) as nterms FROM read_parquet('{static_tfidf_path}/*.parquet')").df()
else: else:
nterms = conn.execute(f"SELECT MAX({term_colname + '_id'}) as nterms FROM read_parquet('{tfidf_path}/*/*.parquet')").df() nterms = conn.execute(f"SELECT MAX({term_colname + '_id'}) as nterms FROM read_parquet('{tfidf_path}/*/*.parquet')").df()
nterms = nterms.nterms.values nterms = nterms.nterms.values