From a0a6a08bf246b525480a35c957d8f7d90b5d9cfa Mon Sep 17 00:00:00 2001 From: Nathan TeBlunthuis Date: Wed, 27 Nov 2024 18:49:03 -0800 Subject: [PATCH] handle case where we're in a parent directory. --- ngrams/term_frequencies.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/ngrams/term_frequencies.py b/ngrams/term_frequencies.py index d0eb82d..b687192 100755 --- a/ngrams/term_frequencies.py +++ b/ngrams/term_frequencies.py @@ -222,11 +222,16 @@ def gen_task_list(mwe_pass='first', dataset='comments'): files = os.listdir(input_parquet) + curdir = Path('.') + if curdir.absolute().name == 'cdsc_reddit': + curdir = str(curdir.absolute()) + else: + curdir = str(curdir.absolute() / "cdsc_reddit") with open("tf_task_list",'w') as outfile: - curdir = Path('.') + for f in files: if f.endswith(".parquet"): - outfile.write(f"{str(curdir.absolute())}/term_frequencies.py weekly_tf {f} --mwe-pass {mwe_pass} --input-parquet {input_parquet} --output-01p-sample-path {output_10p_sample_path} --temp-output-tfidf-path {temp_output_tfidf_path} --output-terms-path {output_terms_path} --output-authors-path {output_terms_path} --dataset {dataset}\n") + outfile.write(f"{curdir}/term_frequencies.py weekly_tf {f} --mwe-pass {mwe_pass} --input-parquet {input_parquet} --output-01p-sample-path {output_10p_sample_path} --temp-output-tfidf-path {temp_output_tfidf_path} --output-terms-path {output_terms_path} --output-authors-path {output_terms_path} --dataset {dataset}\n") if __name__ == "__main__": fire.Fire({"gen_task_list":gen_task_list,