18
0

Some improvements to run affinity clustering on larger dataset and

compute density.
This commit is contained in:
Nate E TeBlunthuis
2020-12-12 20:42:47 -08:00
parent e6294b5b90
commit 56269deee3
15 changed files with 84 additions and 84 deletions

View File

@@ -45,7 +45,7 @@ def tfidf_terms(outpath='/gscratch/comdata/output/reddit_similarity/tfidf/commen
[]
)
def tfidf_authors_weekly(outpath='/gscratch/comdata/output/reddit_similarity/tfidf/comment_authors.parquet',
def tfidf_authors_weekly(outpath='/gscratch/comdata/output/reddit_similarity/tfidf_weekly/comment_authors.parquet',
topN=25000):
return tfidf_weekly("/gscratch/comdata/output/reddit_ngrams/comment_authors.parquet",
@@ -55,7 +55,7 @@ def tfidf_authors_weekly(outpath='/gscratch/comdata/output/reddit_similarity/tfi
['[deleted]','AutoModerator']
)
def tfidf_terms_weekly(outpath='/gscratch/comdata/output/reddit_similarity/tfidf/comment_terms.parquet',
def tfidf_terms_weekly(outpath='/gscratch/comdata/output/reddit_similarity/tfidf_weekly/comment_terms.parquet',
topN=25000):
return tfidf_weekly("/gscratch/comdata/output/reddit_ngrams/comment_terms.parquet",