#!/usr/bin/env python3 """Part 1 for comments: parse one RC_*.zst dump into a parquet file. CLI: comments_part1.py parse_dump RC_2018-08.zst comments_part1.py gen_task_list comments_part1.py parse_dump RC_2018-08.zst --dumpdir=/tmp/in --outdir=/tmp/out """ import fire from dumps_helper import COMMENTS, parse_dump, gen_task_list def _parse_dump(partition, dumpdir=None, outdir=None): parse_dump(COMMENTS, partition, dumpdir=dumpdir, outdir=outdir) def _gen_task_list(dumpdir=None, tasklist=None): gen_task_list(COMMENTS, 'comments_part1.py', dumpdir=dumpdir, tasklist=tasklist) if __name__ == "__main__": fire.Fire({'parse_dump': _parse_dump, 'gen_task_list': _gen_task_list})