diff --git a/similarities/tfidf.py b/similarities/tfidf.py index 1467c3e..6695c57 100755 --- a/similarities/tfidf.py +++ b/similarities/tfidf.py @@ -65,23 +65,12 @@ def tfidf_terms(outpath='/gscratch/comdata/output/reddit_similarity/tfidf/commen included_subreddits=included_subreddits ) -def tfidf_authors_weekly(outpath='/gscratch/comdata/output/reddit_similarity/tfidf_weekly/comment_authors.parquet', +def tfidf_authors_weekly(inpath="/gscratch/comdata/output/reddit_ngrams/comment_authors.parquet", + outpath='/gscratch/comdata/output/reddit_similarity/tfidf_weekly/comment_authors.parquet', topN=25000, included_subreddits=None): - return tfidf_weekly("/gscratch/comdata/output/reddit_ngrams/comment_authors.parquet", - outpath, - topN, - 'author', - ['[deleted]','AutoModerator'], - included_subreddits=included_subreddits - ) - -def tfidf_post_comment_authors_weekly(outpath='/gscratch/comdata/output/reddit_similarity/tfidf_weekly/post_comment_authors.parquet', - topN=25000, - included_subreddits=None): - - return tfidf_weekly("/gscratch/comdata/output/reddit_ngrams/post_comment_authors.parquet", + return tfidf_weekly(inpath, outpath, topN, 'author',