#!/usr/bin/env python3 """Part 2 for submissions: Spark sort + repartition the per-source parquets produced by submissions_part1.py into the final by_subreddit / by_author datasets. Launched via the Hyak-provided start_spark_and_run.sh wrapper: start_spark_and_run.sh 1 submissions_part2.py """ from dumps_helper import SUBMISSIONS, sort_and_write if __name__ == "__main__": sort_and_write(SUBMISSIONS)