fix bug.
This commit is contained in:
parent
4c2ddc7455
commit
9c6d7429b2
@ -87,9 +87,15 @@ def cosine_similarities_weekly(tfidf_path, outfile, term_colname, included_subre
|
|||||||
subreddit_names = conn.execute(f"SELECT DISTINCT subreddit, subreddit_id from read_parquet('{tfidf_path}/*/*.parquet') ORDER BY subreddit_id;").df()
|
subreddit_names = conn.execute(f"SELECT DISTINCT subreddit, subreddit_id from read_parquet('{tfidf_path}/*/*.parquet') ORDER BY subreddit_id;").df()
|
||||||
|
|
||||||
if static_tfidf_path is not None:
|
if static_tfidf_path is not None:
|
||||||
q = f"SELECT COUNT(DISTINCT({term_colname + '_id'})) as nterms FROM read_parquet('{static_tfidf_path}/*.parquet') WHERE count >= {min_df}"
|
q = f"SELECT COUNT(DISTINCT({term_colname + '_id'})) as nterms FROM read_parquet('{static_tfidf_path}/*.parquet')"
|
||||||
if max_df is not None:
|
if min_df is not None and max_df is not None:
|
||||||
q = q + f"AND count <= {max_df}"
|
q = q + f" WHERE count >= {min_df} AND count <= {max_df}"
|
||||||
|
else:
|
||||||
|
if min_df is not None:
|
||||||
|
q = q + f" WHERE count >= {min_df}"
|
||||||
|
if max_df is not None:
|
||||||
|
q = q + f" WHERE count <= {max_df}"
|
||||||
|
|
||||||
nterms = conn.execute(q).df()
|
nterms = conn.execute(q).df()
|
||||||
else:
|
else:
|
||||||
nterms = conn.execute(f"SELECT MAX({term_colname + '_id'}) as nterms FROM read_parquet('{tfidf_path}/*/*.parquet')").df()
|
nterms = conn.execute(f"SELECT MAX({term_colname + '_id'}) as nterms FROM read_parquet('{tfidf_path}/*/*.parquet')").df()
|
||||||
|
Loading…
Reference in New Issue
Block a user