From 224fb89317578dd842146085738e1ed92b6fda4a Mon Sep 17 00:00:00 2001 From: Nathan TeBlunthuis Date: Sun, 1 Dec 2024 15:28:25 -0800 Subject: [PATCH] bugfix. --- ngrams/term_frequencies.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/ngrams/term_frequencies.py b/ngrams/term_frequencies.py index 9a6eb25..c410462 100755 --- a/ngrams/term_frequencies.py +++ b/ngrams/term_frequencies.py @@ -248,9 +248,6 @@ def sort_tf(input_parquet="/gscratch/comdata/output/temp_reddit_comments_by_subr from pyspark.sql import functions as f from pyspark.sql import SparkSession spark = SparkSession.builder.config(map={'spark.executor.memory':'900g'}).getOrCreate() - - -getOrCreate() df = spark.read.parquet(input_parquet) df = df.repartition(2000,tf_name) df = df.sort([tf_name,'week','subreddit'])