add test.
This commit is contained in:
172
test/test_wiki_diff_matcher.py
Normal file
172
test/test_wiki_diff_matcher.py
Normal file
@@ -0,0 +1,172 @@
|
||||
# start the server
|
||||
import asyncio
|
||||
import subprocess
|
||||
from functools import partial
|
||||
|
||||
import pytest
|
||||
import pytest_asyncio
|
||||
from typing import List
|
||||
from deltas import Delete, Equal, Insert, wikitext_split
|
||||
from mwpersistence import Token
|
||||
from wiki_diff_matcher import WikiDiffMatcher
|
||||
|
||||
|
||||
@pytest_asyncio.fixture(scope="module")
|
||||
async def start_stop_server():
|
||||
proc = await asyncio.create_subprocess_exec("php", "-S", "127.0.0.1:8000",
|
||||
"wikidiff2_api.php",
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE)
|
||||
yield proc
|
||||
stdout, stderr = await proc.communicate()
|
||||
print(stdout.encode())
|
||||
print(stderr.encode())
|
||||
proc.terminate()
|
||||
|
||||
|
||||
def assert_equal_enough(tokens:List[Token], rev):
|
||||
# the tokens exclude newlines
|
||||
# we allow extra whitespace at the beginning or end
|
||||
assert ''.join([str(t) for t in tokens]).strip() == rev.replace('\n','').strip()
|
||||
|
||||
|
||||
def assert_correct_equal_section(ops, expected_equal_lines, expected_equal_tokens):
|
||||
n_equal_lines = 0
|
||||
last_b2 = max(ops[0].b1, 0)
|
||||
initial_equal_tokens = 0
|
||||
first_unequal_token = None
|
||||
for op in ops:
|
||||
if not isinstance(op, Equal):
|
||||
if isinstance(op, Insert):
|
||||
first_unequal_token = op.b1
|
||||
else:
|
||||
first_unequal_token = op.a1
|
||||
break
|
||||
n_equal_lines += 1
|
||||
initial_equal_tokens += op.b2 - last_b2
|
||||
last_b2 = op.b2
|
||||
|
||||
if expected_equal_lines == 1:
|
||||
first_unequal_token = op.b2 + 1
|
||||
|
||||
# if the last line is an equal
|
||||
if first_unequal_token is None:
|
||||
first_unequal_token = ops[-1].b2
|
||||
|
||||
|
||||
assert n_equal_lines == expected_equal_lines
|
||||
# check that there are no gaps and the number is as expected
|
||||
assert initial_equal_tokens == last_b2 - ops[0].b1 == first_unequal_token - ops[0].b1 == expected_equal_tokens
|
||||
return last_b2
|
||||
|
||||
def test_equality():
|
||||
rev1 = open("test/test_diff_revisions/1285792388").read()
|
||||
# whitespace is added because exact identity reverts do not result in diffs.
|
||||
matcher = WikiDiffMatcher([rev1,rev1 + " "])
|
||||
diff_processor = matcher.processor()
|
||||
ops, a, b = diff_processor.process(rev1)
|
||||
ops, a, b = diff_processor.process(rev1 + " ")
|
||||
assert len(ops) == 129
|
||||
for op in ops[:-1]:
|
||||
assert isinstance(op, Equal)
|
||||
|
||||
# note that the whitespace token does not result in a token according to wikitext_split
|
||||
# compare the tokens based on the diffs to the baseline
|
||||
# whitespace differences are allowed
|
||||
assert_equal_enough(b, rev1)
|
||||
|
||||
def test_highlight_range():
|
||||
rev1 = open("test/test_diff_revisions/1295229484_rangeedit0").read()
|
||||
rev2 = open("test/test_diff_revisions/1295229484_rangeedit1").read()
|
||||
matcher = WikiDiffMatcher([rev1,rev2])
|
||||
diff_processor = matcher.processor()
|
||||
diff_processor.process(rev1)
|
||||
ops, a, b = diff_processor.process(rev2)
|
||||
assert_equal_enough(a, rev1)
|
||||
assert_equal_enough(b, rev2)
|
||||
|
||||
|
||||
def test_delete():
|
||||
rev1 = open("test/test_diff_revisions/1295229484").read()
|
||||
rev2 = open("test/test_diff_revisions/1295229484_delete").read()
|
||||
|
||||
# whitespace is added because exact identity reverts do not result in diffs.
|
||||
matcher = WikiDiffMatcher([rev1,rev2])
|
||||
diff_processor = matcher.processor()
|
||||
diff_processor.process(rev1)
|
||||
ops, a, b = diff_processor.process(rev2)
|
||||
assert_equal_enough(b, rev2)
|
||||
assert_equal_enough(a, rev1)
|
||||
|
||||
initial_equal_tokens = 0
|
||||
first_nondelete_token = None
|
||||
n_deletes = 0
|
||||
n_deleted_tokens = 0
|
||||
last_b2 = initial_equal_tokens
|
||||
|
||||
initial_equal_lines = 2
|
||||
initial_equal_tokens = 12
|
||||
last_b2 = assert_correct_equal_section(ops,
|
||||
expected_equal_lines=initial_equal_lines,
|
||||
expected_equal_tokens=initial_equal_tokens)
|
||||
first_noninsert_token = initial_equal_tokens
|
||||
|
||||
|
||||
for op in ops[initial_equal_lines:]:
|
||||
if not isinstance(op, Delete):
|
||||
first_nondelete_token = op.a1
|
||||
break
|
||||
n_deletes += 1
|
||||
n_deleted_tokens += op.a2 - last_b2
|
||||
last_b2 = op.a2
|
||||
|
||||
assert n_deletes == 2
|
||||
assert n_deleted_tokens == last_b2 - initial_equal_tokens == first_nondelete_token - initial_equal_tokens == 316
|
||||
|
||||
last_b2 = assert_correct_equal_section(ops[initial_equal_lines + n_deletes:],
|
||||
expected_equal_lines=126,
|
||||
expected_equal_tokens=9323)
|
||||
|
||||
|
||||
|
||||
|
||||
# first lets test that we properly build the operations.
|
||||
# then we can test if the state seems to work as intended.
|
||||
def test_addition():
|
||||
rev1 = open("test/test_diff_revisions/1285792388").read()
|
||||
rev2 = open("test/test_diff_revisions/1295229484").read()
|
||||
matcher = WikiDiffMatcher([rev1,rev2])
|
||||
diff_processor = matcher.processor()
|
||||
|
||||
# note that a and b are constructed from the diffs.
|
||||
# so they reflect the state of the text according to the diff processor
|
||||
ops, a, b = diff_processor.process(rev1)
|
||||
|
||||
for op in ops:
|
||||
assert isinstance(op, Insert)
|
||||
|
||||
assert_equal_enough(b, rev1)
|
||||
|
||||
diff_processor.previous_text = rev1
|
||||
|
||||
ops, a, b = diff_processor.process(rev2)
|
||||
assert_equal_enough(a, rev1)
|
||||
assert_equal_enough(b, rev2)
|
||||
ops = list(ops)
|
||||
initial_equal_lines = 128
|
||||
initial_equal_tokens = 9359
|
||||
last_b2 = assert_correct_equal_section(ops,
|
||||
expected_equal_lines=initial_equal_lines,
|
||||
expected_equal_tokens=initial_equal_tokens)
|
||||
first_noninsert_token = None
|
||||
n_inserts = 0
|
||||
n_inserted_tokens = 0
|
||||
last_b2 = initial_equal_tokens
|
||||
for op in ops[initial_equal_lines:]:
|
||||
n_inserts += 1
|
||||
n_inserted_tokens += op.b2 - last_b2
|
||||
last_b2 = op.b2
|
||||
|
||||
assert n_inserted_tokens == last_b2 - initial_equal_tokens == 292
|
||||
assert n_inserts == 2
|
||||
|
||||
Reference in New Issue
Block a user