# start the server import asyncio import subprocess from functools import partial import pytest import pytest_asyncio from typing import List from deltas import Delete, Equal, Insert, wikitext_split from mwpersistence import Token from wiki_diff_matcher import WikiDiffMatcher @pytest_asyncio.fixture(scope="module") async def start_stop_server(): proc = await asyncio.create_subprocess_exec("php", "-S", "127.0.0.1:8000", "wikidiff2_api.php", stdout=subprocess.PIPE, stderr=subprocess.PIPE) yield proc stdout, stderr = await proc.communicate() print(stdout.encode()) print(stderr.encode()) proc.terminate() def assert_equal_enough(tokens:List[Token], rev): # the tokens exclude newlines # we allow extra whitespace at the beginning or end assert ''.join([str(t) for t in tokens]).strip() == rev.replace('\n','').strip() def assert_correct_equal_section(ops, expected_equal_lines, expected_equal_tokens): n_equal_lines = 0 last_b2 = max(ops[0].b1, 0) initial_equal_tokens = 0 first_unequal_token = None for op in ops: if not isinstance(op, Equal): if isinstance(op, Insert): first_unequal_token = op.b1 else: first_unequal_token = op.a1 break n_equal_lines += 1 initial_equal_tokens += op.b2 - last_b2 last_b2 = op.b2 if expected_equal_lines == 1: first_unequal_token = op.b2 + 1 # if the last line is an equal if first_unequal_token is None: first_unequal_token = ops[-1].b2 assert n_equal_lines == expected_equal_lines # check that there are no gaps and the number is as expected assert initial_equal_tokens == last_b2 - ops[0].b1 == first_unequal_token - ops[0].b1 == expected_equal_tokens return last_b2 def test_equality(): rev1 = open("test/test_diff_revisions/1285792388").read() # whitespace is added because exact identity reverts do not result in diffs. matcher = WikiDiffMatcher([rev1,rev1 + " "]) diff_processor = matcher.processor() ops, a, b = diff_processor.process(rev1) ops, a, b = diff_processor.process(rev1 + " ") assert len(ops) == 129 for op in ops[:-1]: assert isinstance(op, Equal) # note that the whitespace token does not result in a token according to wikitext_split # compare the tokens based on the diffs to the baseline # whitespace differences are allowed assert_equal_enough(b, rev1) def test_highlight_range(): rev1 = open("test/test_diff_revisions/1295229484_rangeedit0").read() rev2 = open("test/test_diff_revisions/1295229484_rangeedit1").read() matcher = WikiDiffMatcher([rev1,rev2]) diff_processor = matcher.processor() diff_processor.process(rev1) ops, a, b = diff_processor.process(rev2) assert_equal_enough(a, rev1) assert_equal_enough(b, rev2) def test_delete(): rev1 = open("test/test_diff_revisions/1295229484").read() rev2 = open("test/test_diff_revisions/1295229484_delete").read() # whitespace is added because exact identity reverts do not result in diffs. matcher = WikiDiffMatcher([rev1,rev2]) diff_processor = matcher.processor() diff_processor.process(rev1) ops, a, b = diff_processor.process(rev2) assert_equal_enough(b, rev2) assert_equal_enough(a, rev1) initial_equal_tokens = 0 first_nondelete_token = None n_deletes = 0 n_deleted_tokens = 0 last_b2 = initial_equal_tokens initial_equal_lines = 2 initial_equal_tokens = 12 last_b2 = assert_correct_equal_section(ops, expected_equal_lines=initial_equal_lines, expected_equal_tokens=initial_equal_tokens) first_noninsert_token = initial_equal_tokens for op in ops[initial_equal_lines:]: if not isinstance(op, Delete): first_nondelete_token = op.a1 break n_deletes += 1 n_deleted_tokens += op.a2 - last_b2 last_b2 = op.a2 assert n_deletes == 2 assert n_deleted_tokens == last_b2 - initial_equal_tokens == first_nondelete_token - initial_equal_tokens == 316 last_b2 = assert_correct_equal_section(ops[initial_equal_lines + n_deletes:], expected_equal_lines=126, expected_equal_tokens=9323) # first lets test that we properly build the operations. # then we can test if the state seems to work as intended. def test_addition(): rev1 = open("test/test_diff_revisions/1285792388").read() rev2 = open("test/test_diff_revisions/1295229484").read() matcher = WikiDiffMatcher([rev1,rev2]) diff_processor = matcher.processor() # note that a and b are constructed from the diffs. # so they reflect the state of the text according to the diff processor ops, a, b = diff_processor.process(rev1) for op in ops: assert isinstance(op, Insert) assert_equal_enough(b, rev1) diff_processor.previous_text = rev1 ops, a, b = diff_processor.process(rev2) assert_equal_enough(a, rev1) assert_equal_enough(b, rev2) ops = list(ops) initial_equal_lines = 128 initial_equal_tokens = 9359 last_b2 = assert_correct_equal_section(ops, expected_equal_lines=initial_equal_lines, expected_equal_tokens=initial_equal_tokens) first_noninsert_token = None n_inserts = 0 n_inserted_tokens = 0 last_b2 = initial_equal_tokens for op in ops[initial_equal_lines:]: n_inserts += 1 n_inserted_tokens += op.b2 - last_b2 last_b2 = op.b2 assert n_inserted_tokens == last_b2 - initial_equal_tokens == 292 assert n_inserts == 2