Changes from hyak.
This commit is contained in:
@@ -1,7 +1,10 @@
|
||||
all: /gscratch/comdata/output/reddit_density/comment_terms_10000.feather /gscratch/comdata/output/reddit_density/comment_authors_10000.feather
|
||||
all: /gscratch/comdata/output/reddit_density/comment_terms_10000.feather /gscratch/comdata/output/reddit_density/comment_authors_10000.feather /gscratch/comdata/output/reddit_density/subreddit_author_tf_similarities_10000.feather
|
||||
|
||||
/gscratch/comdata/output/reddit_density/comment_terms_10000.feather:overlap_density.py /gscratch/comdata/output/reddit_similarity/comment_terms_10000.feather /gscratch/comdata/output/reddit_similarity/comment_terms_10000.feather
|
||||
python3 overlap_density.py terms --inpath="/gscratch/comdata/output/reddit_similarity/comment_terms_10000.feather" --outpath="/gscratch/comdata/output/reddit_density/comment_terms_10000.feather" --agg=pd.DataFrame.sum
|
||||
start_spark_and_run.sh 1 overlap_density.py terms --inpath="/gscratch/comdata/output/reddit_similarity/comment_terms_10000.feather" --outpath="/gscratch/comdata/output/reddit_density/comment_terms_10000.feather" --agg=pd.DataFrame.sum
|
||||
|
||||
/gscratch/comdata/output/reddit_density/comment_authors_10000.feather:overlap_density.py /gscratch/comdata/output/reddit_similarity/comment_authors_10000.feather
|
||||
python3 overlap_density.py authors --inpath="/gscratch/comdata/output/reddit_similarity/comment_authors_10000.feather" --outpath="/gscratch/comdata/output/reddit_density/comment_authors_10000.feather" --agg=pd.DataFrame.sum
|
||||
start_spark_and_run.sh 1 overlap_density.py authors --inpath="/gscratch/comdata/output/reddit_similarity/comment_authors_10000.feather" --outpath="/gscratch/comdata/output/reddit_density/comment_authors_10000.feather" --agg=pd.DataFrame.sum
|
||||
|
||||
/gscratch/comdata/output/reddit_density/subreddit_author_tf_similarities_10000.feather: overlap_density.py /gscratch/comdata/output/reddit_similarity/subreddit_author_tf_similarities_10000.parquet
|
||||
start_spark_and_run.sh 1 overlap_density.py authors --inpath="/gscratch/comdata/output/reddit_similarity/subreddit_author_tf_similarities_10000.parquet" --outpath="/gscratch/comdata/output/reddit_density/subreddit_author_tf_similarities_10000.feather" --agg=pd.DataFrame.sum
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/usr/bin/bash
|
||||
start_spark_cluster.sh
|
||||
spark-submit --master spark://$(hostname):18899 overlap_density.py wang_overlaps --inpath=/gscratch/comdata/output/reddit_similarity/tfidf_weekly/comment_authors.parquet --to_date=2020-04-13
|
||||
spark-submit --master spark://$(hostname):18899 overlap_density.py authors --inpath=/gscratch/comdata/output/reddit_similarity/comment_authors_10000.feather --outpath=/gscratch/comdata/output/reddit_density/comment_authors_10000.feather --agg=pd.DataFrame.sum
|
||||
stop-all.sh
|
||||
|
||||
@@ -5,7 +5,7 @@ import numpy as np
|
||||
import sys
|
||||
sys.path.append("..")
|
||||
sys.path.append("../similarities")
|
||||
from similarities.similarities_helper import read_tfidf_matrix, reindex_tfidf, reindex_tfidf_time_interval
|
||||
from similarities.similarities_helper import reindex_tfidf, reindex_tfidf_time_interval
|
||||
|
||||
# this is the mean of the ratio of the overlap to the focal size.
|
||||
# mean shared membership per focal community member
|
||||
@@ -72,5 +72,5 @@ if __name__ == "__main__":
|
||||
fire.Fire({'authors':author_overlap_density,
|
||||
'terms':term_overlap_density,
|
||||
'author_weekly':author_overlap_density_weekly,
|
||||
'term_weekly':term_overlap_density_weekly,
|
||||
'wang_overlaps':wang_overlap_density})
|
||||
'term_weekly':term_overlap_density_weekly})
|
||||
|
||||
|
||||
Reference in New Issue
Block a user