#!/usr/bin/env python3 """Part 1 for submissions: parse one RS_*.zst dump into a parquet file. CLI: submissions_part1.py parse_dump RS_2018-08.zst submissions_part1.py gen_task_list submissions_part1.py parse_dump RS_2018-08.zst --dumpdir=/tmp/in --outdir=/tmp/out """ import fire from dumps_helper import SUBMISSIONS, parse_dump, gen_task_list def _parse_dump(partition, dumpdir=None, outdir=None): parse_dump(SUBMISSIONS, partition, dumpdir=dumpdir, outdir=outdir) def _gen_task_list(dumpdir=None, tasklist=None): gen_task_list(SUBMISSIONS, 'submissions_part1.py', dumpdir=dumpdir, tasklist=tasklist) if __name__ == "__main__": fire.Fire({'parse_dump': _parse_dump, 'gen_task_list': _gen_task_list})