1
0

change path

This commit is contained in:
Nathan TeBlunthuis 2024-12-06 08:18:20 -08:00
parent c3d2834110
commit 25bfc57baf

View File

@ -177,7 +177,7 @@ def weekly_tf(partition,
subreddit_weeks = groupby(rows, lambda r: (r.subreddit, r.week)) subreddit_weeks = groupby(rows, lambda r: (r.subreddit, r.week))
if mwe_pass != 'first': if mwe_pass != 'first':
mwe_dataset = pd.read_feather(f'/gscratch/comdata/output/reddit_ngrams/multiword_expressions.feather') mwe_dataset = pd.read_feather(f'/gscratch/comdata/output/reddit_ngrams/reddit_multiword_expressions.feather')
mwe_dataset = mwe_dataset.sort_values(['phrasePWMI'],ascending=False) mwe_dataset = mwe_dataset.sort_values(['phrasePWMI'],ascending=False)
mwe_phrases = list(mwe_dataset.phrase) mwe_phrases = list(mwe_dataset.phrase)
mwe_phrases = [tuple(s.split(' ')) for s in mwe_phrases] mwe_phrases = [tuple(s.split(' ')) for s in mwe_phrases]