pass path into tfidf function.
This commit is contained in:
parent
5a131053af
commit
355d014d5f
@ -27,11 +27,24 @@ def tfidf(inpath, outpath, topN, term_colname, exclude, included_subreddits):
|
|||||||
def tfidf_weekly(inpath, outpath, topN, term_colname, exclude, included_subreddits):
|
def tfidf_weekly(inpath, outpath, topN, term_colname, exclude, included_subreddits):
|
||||||
return _tfidf_wrapper(build_weekly_tfidf_dataset, inpath, outpath, topN, term_colname, exclude, included_subreddits)
|
return _tfidf_wrapper(build_weekly_tfidf_dataset, inpath, outpath, topN, term_colname, exclude, included_subreddits)
|
||||||
|
|
||||||
def tfidf_authors(outpath='/gscratch/comdata/output/reddit_similarity/tfidf/comment_authors.parquet',
|
def tfidf_post_comment_authors(outpath='/gscratch/comdata/output/reddit_similarity/tfidf/post_authors.parquet',
|
||||||
topN=25000,
|
topN=25000,
|
||||||
included_subreddits=None):
|
included_subreddits=None):
|
||||||
|
|
||||||
return tfidf("/gscratch/comdata/output/reddit_ngrams/comment_authors.parquet",
|
return tfidf("/gscratch/comdata/output/reddit_ngrams/post_comment_authors.parquet",
|
||||||
|
outpath,
|
||||||
|
topN,
|
||||||
|
'author',
|
||||||
|
['[deleted]','AutoModerator'],
|
||||||
|
included_subreddits=included_subreddits
|
||||||
|
)
|
||||||
|
|
||||||
|
def tfidf_authors(inpath="/gscratch/comdata/output/reddit_ngrams/comment_authors.parquet",
|
||||||
|
outpath='/gscratch/comdata/output/reddit_similarity/tfidf/comment_authors.parquet',
|
||||||
|
topN=25000,
|
||||||
|
included_subreddits=None):
|
||||||
|
|
||||||
|
return tfidf(inpath,
|
||||||
outpath,
|
outpath,
|
||||||
topN,
|
topN,
|
||||||
'author',
|
'author',
|
||||||
@ -63,6 +76,18 @@ def tfidf_authors_weekly(outpath='/gscratch/comdata/output/reddit_similarity/tfi
|
|||||||
included_subreddits=included_subreddits
|
included_subreddits=included_subreddits
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def tfidf_post_comment_authors_weekly(outpath='/gscratch/comdata/output/reddit_similarity/tfidf_weekly/post_comment_authors.parquet',
|
||||||
|
topN=25000,
|
||||||
|
included_subreddits=None):
|
||||||
|
|
||||||
|
return tfidf_weekly("/gscratch/comdata/output/reddit_ngrams/post_comment_authors.parquet",
|
||||||
|
outpath,
|
||||||
|
topN,
|
||||||
|
'author',
|
||||||
|
['[deleted]','AutoModerator'],
|
||||||
|
included_subreddits=included_subreddits
|
||||||
|
)
|
||||||
|
|
||||||
def tfidf_terms_weekly(outpath='/gscratch/comdata/output/reddit_similarity/tfidf_weekly/comment_terms.parquet',
|
def tfidf_terms_weekly(outpath='/gscratch/comdata/output/reddit_similarity/tfidf_weekly/comment_terms.parquet',
|
||||||
topN=25000,
|
topN=25000,
|
||||||
included_subreddits=None):
|
included_subreddits=None):
|
||||||
|
Loading…
Reference in New Issue
Block a user