#!/usr/bin/env python3 """Part 2 for submissions: Spark sort + repartition into the final datasets. Must be launched from a login node via the Hyak-provided wrapper: start_spark_and_run.sh 1 submissions_part2.py start_spark_and_run.sh 1 submissions_part2.py --indir=/path/to/parquets --mode=append --indir defaults to the temp submissions dir in dumps_helper.py. --mode defaults to 'overwrite'; use 'append' to add a new layer without touching existing partition files (see add_months.sh). """ import fire from dumps_helper import SUBMISSIONS, sort_and_write if __name__ == "__main__": fire.Fire(lambda indir=None, mode='overwrite': sort_and_write(SUBMISSIONS, indir=indir, mode=mode))