13
0
cdsc_reddit/similarities/job_script.sh

7 lines
313 B
Bash
Raw Normal View History

2020-12-09 01:32:20 +00:00
#!/usr/bin/bash
2023-05-24 00:18:19 +00:00
source ~/.bashrc
echo $(hostname)
2020-12-09 01:32:20 +00:00
start_spark_cluster.sh
2023-05-24 00:18:19 +00:00
spark-submit --verbose --master spark://$(hostname):43015 tfidf.py authors --topN=100000 --inpath=../../data/reddit_ngrams/comment_authors_sorted.parquet --outpath=../../data/reddit_similarity/tfidf/comment_authors_100k.parquet
stop-all.sh