diff --git a/ngrams/term_frequencies.py b/ngrams/term_frequencies.py index 741e914..1bb0f70 100755 --- a/ngrams/term_frequencies.py +++ b/ngrams/term_frequencies.py @@ -14,6 +14,7 @@ from nltk.util import ngrams import string from random import random from pathlib import Path +os.environ(["_JAVA_OPTIONS"]="-Xmx920g" # remove urls # taken from https://stackoverflow.com/questions/3809401/what-is-a-good-regular-expression-to-match-a-url