13
0
cdsc_reddit/ngrams/checkpoint_parallelsql.sbatch

27 lines
815 B
Plaintext
Raw Permalink Normal View History

2020-08-09 07:21:50 +00:00
#!/bin/bash
## parallel_sql_job.sh
#SBATCH --job-name=tf_subreddit_comments
## Allocation Definition
#SBATCH --account=comdata-ckpt
#SBATCH --partition=ckpt
## Resources
## Nodes. This should always be 1 for parallel-sql.
#SBATCH --nodes=1
## Walltime (12 hours)
#SBATCH --time=12:00:00
## Memory per node
2020-08-23 18:57:55 +00:00
#SBATCH --mem=32G
2020-08-09 07:21:50 +00:00
#SBATCH --cpus-per-task=4
#SBATCH --ntasks=1
#SBATCH -D /gscratch/comdata/users/nathante/cdsc-reddit
source ./bin/activate
2020-08-09 07:21:50 +00:00
module load parallel_sql
echo $(which perl)
conda list pyarrow
which python3
2020-08-09 07:21:50 +00:00
#Put here commands to load other modules (e.g. matlab etc.)
#Below command means that parallel_sql will get tasks from the database
#and run them on the node (in parallel). So a 16 core node will have
#16 tasks running at one time.
parallel-sql --sql -a parallel --exit-on-term --jobs 4