diff --git a/reddit_bz2_2parquet.py b/comments_2_parquet.py similarity index 98% rename from reddit_bz2_2parquet.py rename to comments_2_parquet.py index 93c3d45..3042f58 100755 --- a/reddit_bz2_2parquet.py +++ b/comments_2_parquet.py @@ -6,7 +6,7 @@ from pyspark.sql.types import * from pyspark import SparkConf, SparkContext from pyspark.sql import SparkSession, SQLContext -conf = SparkConf().setAppName("Reddit to bz2") +conf = SparkConf().setAppName("Reddit comments to parquet") conf = conf.set('spark.sql.crossJoin.enabled',"true") spark = SparkSession.builder.getOrCreate()