From e2b6c1b4819dcc1afcecd75a528272dd19f2cddf Mon Sep 17 00:00:00 2001 From: Nathan TeBlunthuis Date: Thu, 12 Dec 2024 07:17:10 -0800 Subject: [PATCH] configure to use the g2-cpu node. --- similarities/tfidf.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/similarities/tfidf.py b/similarities/tfidf.py index 6695c57..89e529b 100755 --- a/similarities/tfidf.py +++ b/similarities/tfidf.py @@ -5,8 +5,7 @@ from pyspark.sql import functions as f from similarities_helper import build_tfidf_dataset, build_weekly_tfidf_dataset, select_topN_subreddits def _tfidf_wrapper(func, inpath, outpath, topN, term_colname, exclude, included_subreddits): - spark = SparkSession.builder.getOrCreate() - + spark = SparkSession.builder.config(map={'spark.executor.memory':'900g','spark.executor.cores':128}).getOrCreate() df = spark.read.parquet(inpath) df = df.filter(~ f.col(term_colname).isin(exclude))