1
0

pass path into tfidf function.

This commit is contained in:
Nathan TeBlunthuis 2024-12-02 08:03:19 -08:00
parent 5a131053af
commit 355d014d5f

View File

@ -27,11 +27,24 @@ def tfidf(inpath, outpath, topN, term_colname, exclude, included_subreddits):
def tfidf_weekly(inpath, outpath, topN, term_colname, exclude, included_subreddits):
return _tfidf_wrapper(build_weekly_tfidf_dataset, inpath, outpath, topN, term_colname, exclude, included_subreddits)
def tfidf_authors(outpath='/gscratch/comdata/output/reddit_similarity/tfidf/comment_authors.parquet',
def tfidf_post_comment_authors(outpath='/gscratch/comdata/output/reddit_similarity/tfidf/post_authors.parquet',
topN=25000,
included_subreddits=None):
return tfidf("/gscratch/comdata/output/reddit_ngrams/comment_authors.parquet",
return tfidf("/gscratch/comdata/output/reddit_ngrams/post_comment_authors.parquet",
outpath,
topN,
'author',
['[deleted]','AutoModerator'],
included_subreddits=included_subreddits
)
def tfidf_authors(inpath="/gscratch/comdata/output/reddit_ngrams/comment_authors.parquet",
outpath='/gscratch/comdata/output/reddit_similarity/tfidf/comment_authors.parquet',
topN=25000,
included_subreddits=None):
return tfidf(inpath,
outpath,
topN,
'author',
@ -63,6 +76,18 @@ def tfidf_authors_weekly(outpath='/gscratch/comdata/output/reddit_similarity/tfi
included_subreddits=included_subreddits
)
def tfidf_post_comment_authors_weekly(outpath='/gscratch/comdata/output/reddit_similarity/tfidf_weekly/post_comment_authors.parquet',
topN=25000,
included_subreddits=None):
return tfidf_weekly("/gscratch/comdata/output/reddit_ngrams/post_comment_authors.parquet",
outpath,
topN,
'author',
['[deleted]','AutoModerator'],
included_subreddits=included_subreddits
)
def tfidf_terms_weekly(outpath='/gscratch/comdata/output/reddit_similarity/tfidf_weekly/comment_terms.parquet',
topN=25000,
included_subreddits=None):