cdsc_reddit/datasets/submissions_2_parquet.sh

#!/usr/bin/env bash
## this should be run manually since we don't have a nice way to wait on parallel_sql jobs


srun -p compute-bigmem -A comdata --nodes=1 --mem-per-cpu=9g -c 40 --time=120:00:00 python3 $(pwd)/submissions_2_parquet_part1.py gen_task_list

start_spark_and_run.sh 1 $(pwd)/submissions_2_parquet_part2.py
git-annex in 2022-04-06 18:11:11 +00:00			`#!/usr/bin/env bash`
Update submissions to parse using the backfill queue. 2020-08-12 05:37:36 +00:00			`## this should be run manually since we don't have a nice way to wait on parallel_sql jobs`
Script to run both parts of submissions_2_parquet.sh 2020-07-07 06:27:18 +00:00

git-annex in 2022-04-06 18:11:11 +00:00			`srun -p compute-bigmem -A comdata --nodes=1 --mem-per-cpu=9g -c 40 --time=120:00:00 python3 $(pwd)/submissions_2_parquet_part1.py gen_task_list`
Script to run both parts of submissions_2_parquet.sh 2020-07-07 06:27:18 +00:00
			`start_spark_and_run.sh 1 $(pwd)/submissions_2_parquet_part2.py`