From e2e7d7dbb1712fd849ef44d40b7a678d9aa7607e Mon Sep 17 00:00:00 2001 From: Nathan TeBlunthuis Date: Sat, 28 Dec 2024 19:27:42 -0800 Subject: [PATCH] more print debugging --- similarities/weekly_cosine_similarities.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/similarities/weekly_cosine_similarities.py b/similarities/weekly_cosine_similarities.py index af545f3..cd2c073 100755 --- a/similarities/weekly_cosine_similarities.py +++ b/similarities/weekly_cosine_similarities.py @@ -84,13 +84,14 @@ def cosine_similarities_weekly(tfidf_path, outfile, term_colname, included_subre nterms = conn.execute(f"SELECT MAX({term_colname + '_id'}) as nterms FROM read_parquet('{tfidf_path}/*/*.parquet')").df() nterms = nterms.nterms.values - + print(nterms) + print(int(nterms[0])) weeks = conn.execute(f"SELECT DISTINCT week FROM read_parquet('{tfidf_path}/*/*.parquet')").df() weeks = weeks.week.values conn.close() print(f"computing weekly similarities") - week_similarities_helper = partial(_week_similarities,simfunc=simfunc, tfidf_path=tfidf_path, term_colname=term_colname, outdir=outfile, min_df=min_df, max_df=max_df, included_subreddits=included_subreddits, topN=None, subreddit_names=subreddit_names,nterms=int(nterms[0])) + week_similarities_helper = partial(_week_similarities,simfunc=simfunc, tfidf_path=tfidf_path, term_colname=term_colname, outdir=outfile, min_df=min_df, max_df=max_df, included_subreddits=included_subreddits, topN=None, subreddit_names=subreddit_names,nterms=nterms) for week in weeks: week_similarities_helper(week)