1
0

configure to use the g2-cpu node.

This commit is contained in:
Nathan TeBlunthuis 2024-12-12 07:17:10 -08:00
parent f38ec6c129
commit e2b6c1b481

View File

@ -5,8 +5,7 @@ from pyspark.sql import functions as f
from similarities_helper import build_tfidf_dataset, build_weekly_tfidf_dataset, select_topN_subreddits
def _tfidf_wrapper(func, inpath, outpath, topN, term_colname, exclude, included_subreddits):
spark = SparkSession.builder.getOrCreate()
spark = SparkSession.builder.config(map={'spark.executor.memory':'900g','spark.executor.cores':128}).getOrCreate()
df = spark.read.parquet(inpath)
df = df.filter(~ f.col(term_colname).isin(exclude))