diff --git a/similarities/weekly_cosine_similarities.py b/similarities/weekly_cosine_similarities.py index cd2c073..bfd0b25 100755 --- a/similarities/weekly_cosine_similarities.py +++ b/similarities/weekly_cosine_similarities.py @@ -102,43 +102,47 @@ def cosine_similarities_weekly(tfidf_path, outfile, term_colname, included_subre # with Pool(cpu_count()) as pool: # maybe it can be done with 40 cores on the huge machine? -def author_cosine_similarities_weekly(outfile, infile='/gscratch/comdata/output/reddit_similarity/tfidf_weekly/comment_authors_test.parquet', min_df=2, max_df=None, included_subreddits=None, topN=500): +def author_cosine_similarities_weekly(outfile, infile='/gscratch/comdata/output/reddit_similarity/tfidf_weekly/comment_authors_test.parquet', min_df=2, max_df=None, included_subreddits=None, topN=500, static_tfidf_path=None): return cosine_similarities_weekly(infile, outfile, 'author', max_df, included_subreddits, topN, - min_df=2 + min_df=2, + static_tfidf_path=static_tfidf_path ) -def term_cosine_similarities_weekly(outfile, infile='/gscratch/comdata/output/reddit_similarity/tfidf_weekly/comment_terms.parquet', min_df=None, max_df=None, included_subreddits=None, topN=None): +def term_cosine_similarities_weekly(outfile, infile='/gscratch/comdata/output/reddit_similarity/tfidf_weekly/comment_terms.parquet', min_df=None, max_df=None, included_subreddits=None, topN=None, static_tfidf_path=None): return cosine_similarities_weekly(infile, outfile, 'term', min_df, max_df, included_subreddits, - topN) + topN, + static_tfidf_path=static_tfidf_path) -def author_cosine_similarities_weekly_lsi(outfile, infile = '/gscratch/comdata/output/reddit_similarity/tfidf_weekly/comment_authors_test.parquet', included_subreddits=None, n_components=100,lsi_model=None): +def author_cosine_similarities_weekly_lsi(outfile, infile = '/gscratch/comdata/output/reddit_similarity/tfidf_weekly/comment_authors_test.parquet', included_subreddits=None, n_components=100,lsi_model=None,static_tfidf_path=None): return cosine_similarities_weekly_lsi(infile, outfile, 'author', included_subreddits=included_subreddits, n_components=n_components, - lsi_model=lsi_model + lsi_model=lsi_model, + static_tfidf_path=static_tfidf_path ) -def term_cosine_similarities_weekly_lsi(outfile, infile = '/gscratch/comdata/output/reddit_similarity/tfidf_weekly/comment_terms.parquet', included_subreddits=None, n_components=100,lsi_model=None): +def term_cosine_similarities_weekly_lsi(outfile, infile = '/gscratch/comdata/output/reddit_similarity/tfidf_weekly/comment_terms.parquet', included_subreddits=None, n_components=100,lsi_model=None,static_tfidf_path=None): return cosine_similarities_weekly_lsi(infile, outfile, 'term', included_subreddits=included_subreddits, n_components=n_components, lsi_model=lsi_model, + static_tfidf_path=static_tfidf_path ) if __name__ == "__main__":