From 88fca0f82b6bdef75e5536969b7ed52c8c538cd8 Mon Sep 17 00:00:00 2001 From: Nathan TeBlunthuis Date: Sun, 1 Dec 2024 09:55:12 -0800 Subject: [PATCH] allow posts schemas to be nullable. --- ngrams/term_frequencies.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ngrams/term_frequencies.py b/ngrams/term_frequencies.py index 1ca68ca..cd89c8e 100755 --- a/ngrams/term_frequencies.py +++ b/ngrams/term_frequencies.py @@ -126,7 +126,7 @@ def weekly_tf(partition, nullable_schema = False elif reddit_dataset == 'posts': tf_func = tf_posts - nullable_schema = False + nullable_schema = True dataset = ds.dataset(f"{input_parquet}/{partition}", format='parquet') if not os.path.exists(output_10p_sample_path):