diff --git a/ngrams/term_frequencies.py b/ngrams/term_frequencies.py index b6e0550..b490e42 100755 --- a/ngrams/term_frequencies.py +++ b/ngrams/term_frequencies.py @@ -187,6 +187,8 @@ def weekly_tf(partition, outchunksize = 10000 + Path(output_terms_path).mkdir(parents=True, exist_ok=True) + with pq.ParquetWriter(f"{output_terms_path}/{partition}",schema=schema,compression='snappy',flavor='spark') as writer, pq.ParquetWriter(f"{output_authors_path}/{partition}",schema=author_schema,compression='snappy',flavor='spark') as author_writer: while True: