18
0

support passing in list of tfidf vectors.

Also lowercases included subreddits.
This commit is contained in:
Nate E TeBlunthuis
2021-04-26 11:16:28 -07:00
parent 003a48aea5
commit 806cfc948f
3 changed files with 20 additions and 16 deletions

View File

@@ -60,7 +60,7 @@ def reindex_tfidf(infile, term_colname, min_df=None, max_df=None, included_subre
if included_subreddits is None:
included_subreddits = select_topN_subreddits(topN)
else:
included_subreddits = set(open(included_subreddits))
included_subreddits = set(map(str.strip,map(str.lower,open(included_subreddits))))
if exclude_phrases == True:
tfidf = tfidf.filter(~f.col(term_colname).contains("_"))