1
0
This commit is contained in:
Nathan TeBlunthuis 2024-11-27 19:53:04 -08:00
parent 0d7f4d3cec
commit 2e5181602b

View File

@ -111,7 +111,7 @@ def tf_posts(subreddit_weeks, mwe_pass, mwe_tokenize, stopWords):
def weekly_tf(partition, def weekly_tf(partition,
mwe_pass = 'first', mwe_pass = 'first',
input_parquet='/gscratch/comdata/output/reddit_comments_by_subreddit.parquet/', input_parquet='/gscratch/comdata/output/reddit_comments_by_subreddit.parquet/',
output_10p_sample_path="/gscratch/comdata/users/nathante/reddit_comment_ngrams_10p_sample/", output_sample_path="/gscratch/comdata/users/nathante/reddit_comment_ngrams_10p_sample/",
temp_output_tfidf_path="/gscratch/comdata/users/nathante/reddit_tfidf_test_authors.parquet_temp/", temp_output_tfidf_path="/gscratch/comdata/users/nathante/reddit_tfidf_test_authors.parquet_temp/",
output_terms_path="/gscratch/comdata/output/reddit_ngrams/comment_terms.parquet", output_terms_path="/gscratch/comdata/output/reddit_ngrams/comment_terms.parquet",
output_authors_path="/gscratch/comdata/output/reddit_ngrams/comment_authors.parquet", output_authors_path="/gscratch/comdata/output/reddit_ngrams/comment_authors.parquet",
@ -237,7 +237,7 @@ def gen_task_list(mwe_pass='first',
for f in files: for f in files:
if f.endswith(".parquet"): if f.endswith(".parquet"):
outfile.write(f"{curdir}/term_frequencies.py weekly_tf {f} --mwe-pass {mwe_pass} --input-parquet {input_parquet} --output-01p-sample-path {output_10p_sample_path} --temp-output-tfidf-path {temp_output_tfidf_path} --output-terms-path {output_terms_path} --output-authors-path {output_terms_path} --reddit-dataset {dataset}\n") outfile.write(f"{curdir}/term_frequencies.py weekly_tf {f} --mwe-pass {mwe_pass} --input-parquet {input_parquet} --output-sample-path {output_10p_sample_path} --temp-output-tfidf-path {temp_output_tfidf_path} --output-terms-path {output_terms_path} --output-authors-path {output_terms_path} --reddit-dataset {dataset}\n")
if __name__ == "__main__": if __name__ == "__main__":
fire.Fire({"gen_task_list":gen_task_list, fire.Fire({"gen_task_list":gen_task_list,