#!/usr/bin/env python3 """Part 2 for comments: Spark sort + repartition the per-source parquets produced by comments_part1.py into the final by_subreddit / by_author datasets. Launched via the Hyak-provided start_spark_and_run.sh wrapper: start_spark_and_run.sh 1 comments_part2.py """ from dumps_helper import COMMENTS, sort_and_write if __name__ == "__main__": sort_and_write(COMMENTS)