diff --git a/ngrams/term_frequencies.py b/ngrams/term_frequencies.py index 3597a41..d0eb82d 100755 --- a/ngrams/term_frequencies.py +++ b/ngrams/term_frequencies.py @@ -13,6 +13,7 @@ from nltk.corpus import stopwords from nltk.util import ngrams import string from random import random +from pathlib import Path # remove urls # taken from https://stackoverflow.com/questions/3809401/what-is-a-good-regular-expression-to-match-a-url @@ -222,9 +223,10 @@ def gen_task_list(mwe_pass='first', files = os.listdir(input_parquet) with open("tf_task_list",'w') as outfile: + curdir = Path('.') for f in files: if f.endswith(".parquet"): - outfile.write(f"./term_frequencies.py weekly_tf {f} --mwe-pass {mwe_pass} --input-parquet {input_parquet} --output-01p-sample-path {output_10p_sample_path} --temp-output-tfidf-path {temp_output_tfidf_path} --output-terms-path {output_terms_path} --output-authors-path {output_terms_path} --dataset {dataset}\n") + outfile.write(f"{str(curdir.absolute())}/term_frequencies.py weekly_tf {f} --mwe-pass {mwe_pass} --input-parquet {input_parquet} --output-01p-sample-path {output_10p_sample_path} --temp-output-tfidf-path {temp_output_tfidf_path} --output-terms-path {output_terms_path} --output-authors-path {output_terms_path} --dataset {dataset}\n") if __name__ == "__main__": fire.Fire({"gen_task_list":gen_task_list,