pass clusters param through
This commit is contained in:
parent
dba0faf125
commit
4168d0d4cf
@ -191,7 +191,9 @@ def write_weekly_similarities(path, sims, week, names, clusters=None):
|
|||||||
|
|
||||||
if clusters is not None:
|
if clusters is not None:
|
||||||
cluster_sims = duckdb.sql("SELECT sims.* FROM sims SEMI JOIN clusters ON _subreddit == sr_i AND variable == sr_j").df()
|
cluster_sims = duckdb.sql("SELECT sims.* FROM sims SEMI JOIN clusters ON _subreddit == sr_i AND variable == sr_j").df()
|
||||||
|
else:
|
||||||
|
cluster_sims = sims
|
||||||
|
|
||||||
cluster_sims.to_parquet(p / week)
|
cluster_sims.to_parquet(p / week)
|
||||||
|
|
||||||
def column_overlaps(mat):
|
def column_overlaps(mat):
|
||||||
|
@ -116,7 +116,7 @@ def cosine_similarities_weekly(tfidf_path, outfile, term_colname, included_subre
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
def author_cosine_similarities_weekly(outfile, infile='/gscratch/comdata/output/reddit_similarity/tfidf_weekly/comment_authors_test.parquet', min_df=2, max_df=None, included_subreddits=None, topN=500, static_tfidf_path=None):
|
def author_cosine_similarities_weekly(outfile, infile='/gscratch/comdata/output/reddit_similarity/tfidf_weekly/comment_authors_test.parquet', min_df=2, max_df=None, included_subreddits=None, topN=500, static_tfidf_path=None, clusters=None):
|
||||||
return cosine_similarities_weekly(infile,
|
return cosine_similarities_weekly(infile,
|
||||||
outfile,
|
outfile,
|
||||||
'author',
|
'author',
|
||||||
@ -125,10 +125,11 @@ def author_cosine_similarities_weekly(outfile, infile='/gscratch/comdata/output/
|
|||||||
topN,
|
topN,
|
||||||
min_df=min_df,
|
min_df=min_df,
|
||||||
max_df=max_df,
|
max_df=max_df,
|
||||||
static_tfidf_path=static_tfidf_path
|
static_tfidf_path=static_tfidf_path,
|
||||||
|
clusters=clusters
|
||||||
)
|
)
|
||||||
|
|
||||||
def term_cosine_similarities_weekly(outfile, infile='/gscratch/comdata/output/reddit_similarity/tfidf_weekly/comment_terms.parquet', min_df=None, max_df=None, included_subreddits=None, topN=None, static_tfidf_path=None):
|
def term_cosine_similarities_weekly(outfile, infile='/gscratch/comdata/output/reddit_similarity/tfidf_weekly/comment_terms.parquet', min_df=None, max_df=None, included_subreddits=None, topN=None, static_tfidf_path=None, clusters=None):
|
||||||
return cosine_similarities_weekly(infile,
|
return cosine_similarities_weekly(infile,
|
||||||
outfile,
|
outfile,
|
||||||
'term',
|
'term',
|
||||||
@ -136,10 +137,11 @@ def term_cosine_similarities_weekly(outfile, infile='/gscratch/comdata/output/re
|
|||||||
max_df,
|
max_df,
|
||||||
included_subreddits,
|
included_subreddits,
|
||||||
topN,
|
topN,
|
||||||
static_tfidf_path=static_tfidf_path)
|
static_tfidf_path=static_tfidf_path,
|
||||||
|
clusters=clusters)
|
||||||
|
|
||||||
|
|
||||||
def author_cosine_similarities_weekly_lsi(outfile, infile = '/gscratch/comdata/output/reddit_similarity/tfidf_weekly/comment_authors_test.parquet', included_subreddits=None, n_components=100,lsi_model=None,static_tfidf_path=None, min_df=2):
|
def author_cosine_similarities_weekly_lsi(outfile, infile = '/gscratch/comdata/output/reddit_similarity/tfidf_weekly/comment_authors_test.parquet', included_subreddits=None, n_components=100,lsi_model=None,static_tfidf_path=None, min_df=2, clusters=None):
|
||||||
return cosine_similarities_weekly_lsi(infile,
|
return cosine_similarities_weekly_lsi(infile,
|
||||||
outfile,
|
outfile,
|
||||||
'author',
|
'author',
|
||||||
@ -147,18 +149,20 @@ def author_cosine_similarities_weekly_lsi(outfile, infile = '/gscratch/comdata/o
|
|||||||
n_components=n_components,
|
n_components=n_components,
|
||||||
lsi_model=lsi_model,
|
lsi_model=lsi_model,
|
||||||
static_tfidf_path=static_tfidf_path,
|
static_tfidf_path=static_tfidf_path,
|
||||||
min_df=min_df
|
min_df=min_df,
|
||||||
|
clusters=clusters
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def term_cosine_similarities_weekly_lsi(outfile, infile = '/gscratch/comdata/output/reddit_similarity/tfidf_weekly/comment_terms.parquet', included_subreddits=None, n_components=100,lsi_model=None,static_tfidf_path=None):
|
def term_cosine_similarities_weekly_lsi(outfile, infile = '/gscratch/comdata/output/reddit_similarity/tfidf_weekly/comment_terms.parquet', included_subreddits=None, n_components=100,lsi_model=None,static_tfidf_path=None,clusters=None):
|
||||||
return cosine_similarities_weekly_lsi(infile,
|
return cosine_similarities_weekly_lsi(infile,
|
||||||
outfile,
|
outfile,
|
||||||
'term',
|
'term',
|
||||||
included_subreddits=included_subreddits,
|
included_subreddits=included_subreddits,
|
||||||
n_components=n_components,
|
n_components=n_components,
|
||||||
lsi_model=lsi_model,
|
lsi_model=lsi_model,
|
||||||
static_tfidf_path=static_tfidf_path
|
static_tfidf_path=static_tfidf_path,
|
||||||
|
clusters=clusters
|
||||||
)
|
)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
Loading…
Reference in New Issue
Block a user