1
0

handle case where we're in a parent directory.

This commit is contained in:
Nathan TeBlunthuis 2024-11-27 18:49:03 -08:00
parent a84b633641
commit a0a6a08bf2

View File

@ -222,11 +222,16 @@ def gen_task_list(mwe_pass='first',
dataset='comments'):
files = os.listdir(input_parquet)
with open("tf_task_list",'w') as outfile:
curdir = Path('.')
if curdir.absolute().name == 'cdsc_reddit':
curdir = str(curdir.absolute())
else:
curdir = str(curdir.absolute() / "cdsc_reddit")
with open("tf_task_list",'w') as outfile:
for f in files:
if f.endswith(".parquet"):
outfile.write(f"{str(curdir.absolute())}/term_frequencies.py weekly_tf {f} --mwe-pass {mwe_pass} --input-parquet {input_parquet} --output-01p-sample-path {output_10p_sample_path} --temp-output-tfidf-path {temp_output_tfidf_path} --output-terms-path {output_terms_path} --output-authors-path {output_terms_path} --dataset {dataset}\n")
outfile.write(f"{curdir}/term_frequencies.py weekly_tf {f} --mwe-pass {mwe_pass} --input-parquet {input_parquet} --output-01p-sample-path {output_10p_sample_path} --temp-output-tfidf-path {temp_output_tfidf_path} --output-terms-path {output_terms_path} --output-authors-path {output_terms_path} --dataset {dataset}\n")
if __name__ == "__main__":
fire.Fire({"gen_task_list":gen_task_list,