From 063125695609171475d996a235ed725c77cb6853 Mon Sep 17 00:00:00 2001 From: Nathan TeBlunthuis Date: Wed, 27 Nov 2024 19:06:24 -0800 Subject: [PATCH] make the output directory. --- ngrams/term_frequencies.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ngrams/term_frequencies.py b/ngrams/term_frequencies.py index b6e0550..b490e42 100755 --- a/ngrams/term_frequencies.py +++ b/ngrams/term_frequencies.py @@ -187,6 +187,8 @@ def weekly_tf(partition, outchunksize = 10000 + Path(output_terms_path).mkdir(parents=True, exist_ok=True) + with pq.ParquetWriter(f"{output_terms_path}/{partition}",schema=schema,compression='snappy',flavor='spark') as writer, pq.ParquetWriter(f"{output_authors_path}/{partition}",schema=author_schema,compression='snappy',flavor='spark') as author_writer: while True: