decrease moved paragraph detection cutoff to see if that fixes memory issue.
This commit is contained in:
parent
076df15740
commit
83c92d1a37
@ -489,6 +489,12 @@ class WikiqParser:
|
|||||||
|
|
||||||
persist_state = persistence.State()
|
persist_state = persistence.State()
|
||||||
|
|
||||||
|
|
||||||
|
if self.diff:
|
||||||
|
differ = pywikidiff2.pywikidiff2(
|
||||||
|
numContextLines=1000000, moved_paragraph_detection_cutoff=2000
|
||||||
|
)
|
||||||
|
|
||||||
while not on_last_batch:
|
while not on_last_batch:
|
||||||
# first loop: next_batch <- batch;
|
# first loop: next_batch <- batch;
|
||||||
# second loop: next_batch <- batch; evaluate next_batch.
|
# second loop: next_batch <- batch; evaluate next_batch.
|
||||||
@ -649,11 +655,6 @@ class WikiqParser:
|
|||||||
last_text = last_rev_text
|
last_text = last_rev_text
|
||||||
new_diffs = []
|
new_diffs = []
|
||||||
for text in row_buffer["text"]:
|
for text in row_buffer["text"]:
|
||||||
|
|
||||||
differ = pywikidiff2.pywikidiff2(
|
|
||||||
numContextLines=1000000, moved_paragraph_detection_cutoff=200000
|
|
||||||
)
|
|
||||||
|
|
||||||
new_diffs.append(differ.inline_json_diff(last_text, text))
|
new_diffs.append(differ.inline_json_diff(last_text, text))
|
||||||
last_text = text
|
last_text = text
|
||||||
row_buffer["diff"] = [
|
row_buffer["diff"] = [
|
||||||
|
Loading…
Reference in New Issue
Block a user