18
0

Updating to support wang-style user overlaps.

This commit is contained in:
Nate E TeBlunthuis
2020-12-24 22:38:04 -08:00
parent 56269deee3
commit 4e20dce188
11 changed files with 193 additions and 70 deletions

View File

@@ -35,7 +35,7 @@ def cosine_similarities_weekly(tfidf_path, outfile, term_colname, min_df = None,
subreddit_names['subreddit_id_new'] = subreddit_names['subreddit_id_new'] - 1
spark.stop()
d weeks = sorted(list(subreddit_names.week.drop_duplicates()))
weeks = sorted(list(subreddit_names.week.drop_duplicates()))
for week in weeks:
print(f"loading matrix: {week}")
mat = read_tfidf_matrix_weekly(tempdir.name, term_colname, week)