1
0

correct paths.

This commit is contained in:
Nathan TeBlunthuis 2024-12-04 09:06:02 -08:00
parent 1cca01fb69
commit 85945eae90

View File

@ -5,8 +5,8 @@ from pyspark.sql import SparkSession
import numpy as np
spark = SparkSession.builder.getOrCreate()
df = spark.read.text("/gscratch/comdata/users/nathante/reddit_comment_ngrams_10p_sample/")
df2 = spark.read.text("/gscratch/comdata/users/nathante/reddit_post_ngrams_10p_sample/")
df = spark.read.text("/gscratch/comdata/output/reddit_ngrams/reddit_comment_ngrams_10p_sample/")
df2 = spark.read.text("/gscratch/comdata/output/reddit_ngrams/reddit_post_ngrams_10p_sample/")
df = df.union(df2)
df = df.withColumnRenamed("value","phrase")