bugfix.
This commit is contained in:
parent
73dd2a96a6
commit
a179d608eb
@ -58,6 +58,7 @@ def my_tokenizer(text, mwe_pass, mwe_tokenize, stopWords):
|
||||
# they say that the extract 1-4 grams from 10% of the sentences and then find phrases that appear often relative to the original terms
|
||||
# here we take a 10 percent sample of sentences
|
||||
if mwe_pass == 'first':
|
||||
ngram_output = partition.replace("parquet","txt")
|
||||
sentences = list(sentences)
|
||||
for sentence in sentences:
|
||||
if random() <= 0.1:
|
||||
|
Loading…
Reference in New Issue
Block a user