configure to use the g2-cpu node.
This commit is contained in:
parent
f38ec6c129
commit
e2b6c1b481
@ -5,8 +5,7 @@ from pyspark.sql import functions as f
|
||||
from similarities_helper import build_tfidf_dataset, build_weekly_tfidf_dataset, select_topN_subreddits
|
||||
|
||||
def _tfidf_wrapper(func, inpath, outpath, topN, term_colname, exclude, included_subreddits):
|
||||
spark = SparkSession.builder.getOrCreate()
|
||||
|
||||
spark = SparkSession.builder.config(map={'spark.executor.memory':'900g','spark.executor.cores':128}).getOrCreate()
|
||||
df = spark.read.parquet(inpath)
|
||||
|
||||
df = df.filter(~ f.col(term_colname).isin(exclude))
|
||||
|
Loading…
Reference in New Issue
Block a user