Refactor and reorganze.
This commit is contained in:
26
datasets/checkpoint_parallelsql.sbatch
Normal file
26
datasets/checkpoint_parallelsql.sbatch
Normal file
@@ -0,0 +1,26 @@
|
||||
#!/bin/bash
|
||||
## parallel_sql_job.sh
|
||||
#SBATCH --job-name=tf_subreddit_comments
|
||||
## Allocation Definition
|
||||
#SBATCH --account=comdata-ckpt
|
||||
#SBATCH --partition=ckpt
|
||||
## Resources
|
||||
## Nodes. This should always be 1 for parallel-sql.
|
||||
#SBATCH --nodes=1
|
||||
## Walltime (12 hours)
|
||||
#SBATCH --time=12:00:00
|
||||
## Memory per node
|
||||
#SBATCH --mem=32G
|
||||
#SBATCH --cpus-per-task=4
|
||||
#SBATCH --ntasks=1
|
||||
#SBATCH -D /gscratch/comdata/users/nathante/cdsc-reddit
|
||||
source ./bin/activate
|
||||
module load parallel_sql
|
||||
echo $(which perl)
|
||||
conda list pyarrow
|
||||
which python3
|
||||
#Put here commands to load other modules (e.g. matlab etc.)
|
||||
#Below command means that parallel_sql will get tasks from the database
|
||||
#and run them on the node (in parallel). So a 16 core node will have
|
||||
#16 tasks running at one time.
|
||||
parallel-sql --sql -a parallel --exit-on-term --jobs 4
|
||||
Reference in New Issue
Block a user