diff --git a/ngrams/term_frequencies.py b/ngrams/term_frequencies.py index e04a2b5..47fc97b 100755 --- a/ngrams/term_frequencies.py +++ b/ngrams/term_frequencies.py @@ -111,7 +111,7 @@ def tf_posts(subreddit_weeks, mwe_pass, mwe_tokenize, stopWords): def weekly_tf(partition, mwe_pass = 'first', input_parquet='/gscratch/comdata/output/reddit_comments_by_subreddit.parquet/', - output_10p_sample_path="/gscratch/comdata/users/nathante/reddit_comment_ngrams_10p_sample/", + output_sample_path="/gscratch/comdata/users/nathante/reddit_comment_ngrams_10p_sample/", temp_output_tfidf_path="/gscratch/comdata/users/nathante/reddit_tfidf_test_authors.parquet_temp/", output_terms_path="/gscratch/comdata/output/reddit_ngrams/comment_terms.parquet", output_authors_path="/gscratch/comdata/output/reddit_ngrams/comment_authors.parquet", @@ -237,7 +237,7 @@ def gen_task_list(mwe_pass='first', for f in files: if f.endswith(".parquet"): - outfile.write(f"{curdir}/term_frequencies.py weekly_tf {f} --mwe-pass {mwe_pass} --input-parquet {input_parquet} --output-01p-sample-path {output_10p_sample_path} --temp-output-tfidf-path {temp_output_tfidf_path} --output-terms-path {output_terms_path} --output-authors-path {output_terms_path} --reddit-dataset {dataset}\n") + outfile.write(f"{curdir}/term_frequencies.py weekly_tf {f} --mwe-pass {mwe_pass} --input-parquet {input_parquet} --output-sample-path {output_10p_sample_path} --temp-output-tfidf-path {temp_output_tfidf_path} --output-terms-path {output_terms_path} --output-authors-path {output_terms_path} --reddit-dataset {dataset}\n") if __name__ == "__main__": fire.Fire({"gen_task_list":gen_task_list,