decrease moved paragraph detection cutoff to see if that fixes memory issue.
This commit is contained in:
parent
076df15740
commit
83c92d1a37
@ -489,6 +489,12 @@ class WikiqParser:
|
||||
|
||||
persist_state = persistence.State()
|
||||
|
||||
|
||||
if self.diff:
|
||||
differ = pywikidiff2.pywikidiff2(
|
||||
numContextLines=1000000, moved_paragraph_detection_cutoff=2000
|
||||
)
|
||||
|
||||
while not on_last_batch:
|
||||
# first loop: next_batch <- batch;
|
||||
# second loop: next_batch <- batch; evaluate next_batch.
|
||||
@ -649,11 +655,6 @@ class WikiqParser:
|
||||
last_text = last_rev_text
|
||||
new_diffs = []
|
||||
for text in row_buffer["text"]:
|
||||
|
||||
differ = pywikidiff2.pywikidiff2(
|
||||
numContextLines=1000000, moved_paragraph_detection_cutoff=200000
|
||||
)
|
||||
|
||||
new_diffs.append(differ.inline_json_diff(last_text, text))
|
||||
last_text = text
|
||||
row_buffer["diff"] = [
|
||||
|
Loading…
Reference in New Issue
Block a user