From 6557e25af76f1b70b6ee09544065ad5712521361 Mon Sep 17 00:00:00 2001 From: Nathan TeBlunthuis Date: Tue, 22 Jul 2025 09:50:30 -0700 Subject: [PATCH] make a new pywikidiff2 object for each revision to reduce memory. --- src/wikiq/__init__.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/wikiq/__init__.py b/src/wikiq/__init__.py index cf9d961..25bb8d8 100755 --- a/src/wikiq/__init__.py +++ b/src/wikiq/__init__.py @@ -464,10 +464,6 @@ class WikiqParser: next_batch = {} diff_dict = {} - if self.diff: - differ = pywikidiff2.pywikidiff2( - numContextLines=1000000, moved_paragraph_detection_cutoff=200000 - ) if self.persist != PersistMethod.none: window = deque(maxlen=PERSISTENCE_RADIUS) @@ -652,6 +648,11 @@ class WikiqParser: last_text = last_rev_text new_diffs = [] for text in row_buffer["text"]: + + differ = pywikidiff2.pywikidiff2( + numContextLines=1000000, moved_paragraph_detection_cutoff=200000 + ) + new_diffs.append(differ.inline_json_diff(last_text, text)) last_text = text row_buffer["diff"] = [