diff --git a/similarities/weekly_cosine_similarities.py b/similarities/weekly_cosine_similarities.py index 791fe44..af545f3 100755 --- a/similarities/weekly_cosine_similarities.py +++ b/similarities/weekly_cosine_similarities.py @@ -46,7 +46,9 @@ def _week_similarities(week, simfunc, tfidf_path, term_colname, included_subredd tfidf_colname='tf_idf' # if the max subreddit id we found is less than the number of subreddit names then we have to fill in 0s - mat = csr_matrix((entries[tfidf_colname],(entries[term_id_new]-1, entries.subreddit_id_new-1)),shape=(nterms,subreddit_names.shape[0])) + shape = (nterms,subreddit_names.shape[0]) + print(shape) + mat = csr_matrix((entries[tfidf_colname],(entries[term_id_new]-1, entries.subreddit_id_new-1)),shape=shape) print('computing similarities') print(simfunc) sims = simfunc(mat)