1
0
This commit is contained in:
Nathan TeBlunthuis 2024-11-27 18:56:22 -08:00
parent 78eb16f4d6
commit 587e1c0022

View File

@ -79,7 +79,10 @@ def weekly_tf(partition,
if os.path.exists(f"{output_10p_sample_path}/{ngram_output}"):
os.remove(f"{output_10p_sample_path}/{ngram_output}")
if reddit_dataset == 'comments':
batches = dataset.to_batches(columns=['CreatedAt','subreddit','body','author'])
if reddit_dataset == 'posts':
batches = dataset.to_batches(columns=['CreatedAt','subreddit','title','author'])
schema = pa.schema([pa.field('subreddit', pa.string(), nullable=False),
pa.field('term', pa.string(), nullable=False),