add test.

2025-06-30 15:45:56 -07:00
parent bc7f186112
commit 37734ed092
7 changed files with 962 additions and 0 deletions
--- a/test/test_wiki_diff_matcher.py
+++ b/test/test_wiki_diff_matcher.py
@@ -0,0 +1,172 @@
+# start the server
+import asyncio
+import subprocess
+from functools import partial
+
+import pytest
+import pytest_asyncio
+from typing import List
+from deltas import Delete, Equal, Insert, wikitext_split
+from mwpersistence import Token
+from wiki_diff_matcher import WikiDiffMatcher
+
+
+@pytest_asyncio.fixture(scope="module")
+async def start_stop_server():
+    proc = await asyncio.create_subprocess_exec("php", "-S", "127.0.0.1:8000",
+                                                "wikidiff2_api.php",
+                                                stdout=subprocess.PIPE,
+                                                stderr=subprocess.PIPE)
+    yield proc
+    stdout, stderr = await proc.communicate()
+    print(stdout.encode())
+    print(stderr.encode())
+    proc.terminate()
+
+
+def assert_equal_enough(tokens:List[Token], rev):
+    # the tokens exclude newlines
+    # we allow extra whitespace at the beginning or end
+    assert ''.join([str(t) for t in tokens]).strip() == rev.replace('\n','').strip()
+
+
+def assert_correct_equal_section(ops, expected_equal_lines, expected_equal_tokens):
+    n_equal_lines = 0
+    last_b2 = max(ops[0].b1, 0)
+    initial_equal_tokens = 0
+    first_unequal_token = None
+    for op in ops:
+        if not isinstance(op, Equal):
+            if isinstance(op, Insert):
+                first_unequal_token = op.b1
+            else:
+                first_unequal_token = op.a1
+            break
+        n_equal_lines += 1
+        initial_equal_tokens += op.b2 - last_b2
+        last_b2 = op.b2
+
+    if expected_equal_lines == 1:
+        first_unequal_token = op.b2 + 1
+
+    # if the last line is an equal
+    if first_unequal_token is None:
+        first_unequal_token = ops[-1].b2
+
+    
+    assert n_equal_lines == expected_equal_lines
+    # check that there are no gaps and the number is as expected
+    assert initial_equal_tokens == last_b2 - ops[0].b1 == first_unequal_token - ops[0].b1 == expected_equal_tokens
+    return last_b2
+
+def test_equality():
+    rev1 = open("test/test_diff_revisions/1285792388").read()
+    # whitespace is added because exact identity reverts do not result in diffs.
+    matcher = WikiDiffMatcher([rev1,rev1 + " "])
+    diff_processor = matcher.processor()
+    ops, a, b = diff_processor.process(rev1)
+    ops, a, b = diff_processor.process(rev1 + " ")
+    assert len(ops) == 129
+    for op in ops[:-1]:
+       assert isinstance(op, Equal)
+
+    # note that the whitespace token does not result in a token according to wikitext_split
+    # compare the tokens based on the diffs to the baseline
+    # whitespace differences are allowed
+    assert_equal_enough(b, rev1)
+
+def test_highlight_range():
+    rev1 = open("test/test_diff_revisions/1295229484_rangeedit0").read()
+    rev2 = open("test/test_diff_revisions/1295229484_rangeedit1").read()
+    matcher = WikiDiffMatcher([rev1,rev2])
+    diff_processor = matcher.processor()
+    diff_processor.process(rev1)
+    ops, a, b = diff_processor.process(rev2)
+    assert_equal_enough(a, rev1)
+    assert_equal_enough(b, rev2)
+
+
+def test_delete():
+    rev1 = open("test/test_diff_revisions/1295229484").read()
+    rev2 = open("test/test_diff_revisions/1295229484_delete").read()
+
+    # whitespace is added because exact identity reverts do not result in diffs.
+    matcher = WikiDiffMatcher([rev1,rev2])
+    diff_processor = matcher.processor()
+    diff_processor.process(rev1)
+    ops, a, b = diff_processor.process(rev2)
+    assert_equal_enough(b, rev2)
+    assert_equal_enough(a, rev1)
+
+    initial_equal_tokens = 0
+    first_nondelete_token = None
+    n_deletes = 0
+    n_deleted_tokens = 0
+    last_b2 = initial_equal_tokens
+
+    initial_equal_lines = 2
+    initial_equal_tokens = 12
+    last_b2 = assert_correct_equal_section(ops,
+                                           expected_equal_lines=initial_equal_lines,
+                                           expected_equal_tokens=initial_equal_tokens)
+    first_noninsert_token = initial_equal_tokens
+    
+
+    for op in ops[initial_equal_lines:]:
+        if not isinstance(op, Delete):
+            first_nondelete_token = op.a1
+            break
+        n_deletes += 1
+        n_deleted_tokens += op.a2 - last_b2
+        last_b2 = op.a2
+        
+    assert n_deletes == 2
+    assert n_deleted_tokens == last_b2 - initial_equal_tokens == first_nondelete_token - initial_equal_tokens == 316
+
+    last_b2 = assert_correct_equal_section(ops[initial_equal_lines + n_deletes:],
+                                           expected_equal_lines=126,
+                                           expected_equal_tokens=9323)
+
+
+
+
+# first lets test that we properly build the operations. 
+# then we can test if the state seems to work as intended.
+def test_addition():
+    rev1 = open("test/test_diff_revisions/1285792388").read()
+    rev2 = open("test/test_diff_revisions/1295229484").read()
+    matcher = WikiDiffMatcher([rev1,rev2])
+    diff_processor = matcher.processor()
+
+    # note that a and b are constructed from the diffs.
+    # so they reflect the state of the text according to the diff processor
+    ops, a, b = diff_processor.process(rev1)
+
+    for op in ops:
+       assert isinstance(op, Insert)
+
+    assert_equal_enough(b, rev1)
+    
+    diff_processor.previous_text = rev1
+
+    ops, a, b = diff_processor.process(rev2)
+    assert_equal_enough(a, rev1)
+    assert_equal_enough(b, rev2)
+    ops = list(ops)
+    initial_equal_lines = 128
+    initial_equal_tokens = 9359
+    last_b2 = assert_correct_equal_section(ops,
+                                           expected_equal_lines=initial_equal_lines,
+                                           expected_equal_tokens=initial_equal_tokens)
+    first_noninsert_token = None
+    n_inserts = 0
+    n_inserted_tokens = 0
+    last_b2 = initial_equal_tokens
+    for op in ops[initial_equal_lines:]:
+        n_inserts += 1
+        n_inserted_tokens += op.b2 - last_b2
+        last_b2 = op.b2
+
+    assert n_inserted_tokens == last_b2 - initial_equal_tokens == 292
+    assert n_inserts == 2
+