1
0

add absolute path to call.

This commit is contained in:
Nathan TeBlunthuis 2024-11-27 18:42:29 -08:00
parent ce7b5f92eb
commit a84b633641

View File

@ -13,6 +13,7 @@ from nltk.corpus import stopwords
from nltk.util import ngrams
import string
from random import random
from pathlib import Path
# remove urls
# taken from https://stackoverflow.com/questions/3809401/what-is-a-good-regular-expression-to-match-a-url
@ -222,9 +223,10 @@ def gen_task_list(mwe_pass='first',
files = os.listdir(input_parquet)
with open("tf_task_list",'w') as outfile:
curdir = Path('.')
for f in files:
if f.endswith(".parquet"):
outfile.write(f"./term_frequencies.py weekly_tf {f} --mwe-pass {mwe_pass} --input-parquet {input_parquet} --output-01p-sample-path {output_10p_sample_path} --temp-output-tfidf-path {temp_output_tfidf_path} --output-terms-path {output_terms_path} --output-authors-path {output_terms_path} --dataset {dataset}\n")
outfile.write(f"{str(curdir.absolute())}/term_frequencies.py weekly_tf {f} --mwe-pass {mwe_pass} --input-parquet {input_parquet} --output-01p-sample-path {output_10p_sample_path} --temp-output-tfidf-path {temp_output_tfidf_path} --output-terms-path {output_terms_path} --output-authors-path {output_terms_path} --dataset {dataset}\n")
if __name__ == "__main__":
fire.Fire({"gen_task_list":gen_task_list,