From a5ca25dd6eb03a08a7d51e72fe23a5b31363a175 Mon Sep 17 00:00:00 2001 From: Nathan TeBlunthuis Date: Wed, 27 Nov 2024 19:56:06 -0800 Subject: [PATCH] bugfix. --- ngrams/term_frequencies.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ngrams/term_frequencies.py b/ngrams/term_frequencies.py index 47fc97b..30e1336 100755 --- a/ngrams/term_frequencies.py +++ b/ngrams/term_frequencies.py @@ -111,7 +111,7 @@ def tf_posts(subreddit_weeks, mwe_pass, mwe_tokenize, stopWords): def weekly_tf(partition, mwe_pass = 'first', input_parquet='/gscratch/comdata/output/reddit_comments_by_subreddit.parquet/', - output_sample_path="/gscratch/comdata/users/nathante/reddit_comment_ngrams_10p_sample/", + output_10p_sample_path="/gscratch/comdata/users/nathante/reddit_comment_ngrams_10p_sample/", temp_output_tfidf_path="/gscratch/comdata/users/nathante/reddit_tfidf_test_authors.parquet_temp/", output_terms_path="/gscratch/comdata/output/reddit_ngrams/comment_terms.parquet", output_authors_path="/gscratch/comdata/output/reddit_ngrams/comment_authors.parquet", @@ -237,7 +237,7 @@ def gen_task_list(mwe_pass='first', for f in files: if f.endswith(".parquet"): - outfile.write(f"{curdir}/term_frequencies.py weekly_tf {f} --mwe-pass {mwe_pass} --input-parquet {input_parquet} --output-sample-path {output_10p_sample_path} --temp-output-tfidf-path {temp_output_tfidf_path} --output-terms-path {output_terms_path} --output-authors-path {output_terms_path} --reddit-dataset {dataset}\n") + outfile.write(f"{curdir}/term_frequencies.py weekly_tf {f} --mwe-pass {mwe_pass} --input-parquet {input_parquet} --output-10p_sample-path {output_10p_sample_path} --temp-output-tfidf-path {temp_output_tfidf_path} --output-terms-path {output_terms_path} --output-authors-path {output_terms_path} --reddit-dataset {dataset}\n") if __name__ == "__main__": fire.Fire({"gen_task_list":gen_task_list,