bugfix.
This commit is contained in:
parent
b25c332cea
commit
224fb89317
@ -248,9 +248,6 @@ def sort_tf(input_parquet="/gscratch/comdata/output/temp_reddit_comments_by_subr
|
||||
from pyspark.sql import functions as f
|
||||
from pyspark.sql import SparkSession
|
||||
spark = SparkSession.builder.config(map={'spark.executor.memory':'900g'}).getOrCreate()
|
||||
|
||||
|
||||
getOrCreate()
|
||||
df = spark.read.parquet(input_parquet)
|
||||
df = df.repartition(2000,tf_name)
|
||||
df = df.sort([tf_name,'week','subreddit'])
|
||||
|
Loading…
Reference in New Issue
Block a user