use static tfidf (not weekly) to create tfidf matrix
This commit is contained in:
parent
e2e7d7dbb1
commit
7b5ac73b2c
@ -102,43 +102,47 @@ def cosine_similarities_weekly(tfidf_path, outfile, term_colname, included_subre
|
||||
# with Pool(cpu_count()) as pool: # maybe it can be done with 40 cores on the huge machine?
|
||||
|
||||
|
||||
def author_cosine_similarities_weekly(outfile, infile='/gscratch/comdata/output/reddit_similarity/tfidf_weekly/comment_authors_test.parquet', min_df=2, max_df=None, included_subreddits=None, topN=500):
|
||||
def author_cosine_similarities_weekly(outfile, infile='/gscratch/comdata/output/reddit_similarity/tfidf_weekly/comment_authors_test.parquet', min_df=2, max_df=None, included_subreddits=None, topN=500, static_tfidf_path=None):
|
||||
return cosine_similarities_weekly(infile,
|
||||
outfile,
|
||||
'author',
|
||||
max_df,
|
||||
included_subreddits,
|
||||
topN,
|
||||
min_df=2
|
||||
min_df=2,
|
||||
static_tfidf_path=static_tfidf_path
|
||||
)
|
||||
|
||||
def term_cosine_similarities_weekly(outfile, infile='/gscratch/comdata/output/reddit_similarity/tfidf_weekly/comment_terms.parquet', min_df=None, max_df=None, included_subreddits=None, topN=None):
|
||||
def term_cosine_similarities_weekly(outfile, infile='/gscratch/comdata/output/reddit_similarity/tfidf_weekly/comment_terms.parquet', min_df=None, max_df=None, included_subreddits=None, topN=None, static_tfidf_path=None):
|
||||
return cosine_similarities_weekly(infile,
|
||||
outfile,
|
||||
'term',
|
||||
min_df,
|
||||
max_df,
|
||||
included_subreddits,
|
||||
topN)
|
||||
topN,
|
||||
static_tfidf_path=static_tfidf_path)
|
||||
|
||||
|
||||
def author_cosine_similarities_weekly_lsi(outfile, infile = '/gscratch/comdata/output/reddit_similarity/tfidf_weekly/comment_authors_test.parquet', included_subreddits=None, n_components=100,lsi_model=None):
|
||||
def author_cosine_similarities_weekly_lsi(outfile, infile = '/gscratch/comdata/output/reddit_similarity/tfidf_weekly/comment_authors_test.parquet', included_subreddits=None, n_components=100,lsi_model=None,static_tfidf_path=None):
|
||||
return cosine_similarities_weekly_lsi(infile,
|
||||
outfile,
|
||||
'author',
|
||||
included_subreddits=included_subreddits,
|
||||
n_components=n_components,
|
||||
lsi_model=lsi_model
|
||||
lsi_model=lsi_model,
|
||||
static_tfidf_path=static_tfidf_path
|
||||
)
|
||||
|
||||
|
||||
def term_cosine_similarities_weekly_lsi(outfile, infile = '/gscratch/comdata/output/reddit_similarity/tfidf_weekly/comment_terms.parquet', included_subreddits=None, n_components=100,lsi_model=None):
|
||||
def term_cosine_similarities_weekly_lsi(outfile, infile = '/gscratch/comdata/output/reddit_similarity/tfidf_weekly/comment_terms.parquet', included_subreddits=None, n_components=100,lsi_model=None,static_tfidf_path=None):
|
||||
return cosine_similarities_weekly_lsi(infile,
|
||||
outfile,
|
||||
'term',
|
||||
included_subreddits=included_subreddits,
|
||||
n_components=n_components,
|
||||
lsi_model=lsi_model,
|
||||
static_tfidf_path=static_tfidf_path
|
||||
)
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
Loading…
Reference in New Issue
Block a user