compare pywikidiff2 to making requests to wikidiff2.
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
# start the server
|
||||
import asyncio
|
||||
import subprocess
|
||||
from itertools import chain
|
||||
from functools import partial
|
||||
import re
|
||||
import pytest
|
||||
@@ -8,14 +9,13 @@ import pytest_asyncio
|
||||
from typing import List
|
||||
from deltas import Delete, Equal, Insert, wikitext_split
|
||||
from mwpersistence import Token
|
||||
|
||||
from wiki_diff_matcher import WikiDiffMatcher
|
||||
|
||||
@pytest_asyncio.fixture(scope="module", autouse=True)
|
||||
@pytest_asyncio.fixture(scope="module", autouse=False)
|
||||
async def start_stop_server():
|
||||
print("starting server")
|
||||
proc = await asyncio.create_subprocess_exec("php", "-S", "127.0.0.1:8000",
|
||||
"wikidiff2_api.php",
|
||||
"wikidiff2_api.php", "-c", "php.ini",
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE)
|
||||
# php needs a moment to actually start
|
||||
@@ -26,16 +26,24 @@ async def start_stop_server():
|
||||
stdout, stderr = await proc.communicate()
|
||||
print(stdout.decode())
|
||||
print(stderr.decode())
|
||||
|
||||
def _replace_whitespace(match):
|
||||
if match.group(1): # If spaces matched (e.g., ' ')
|
||||
return ' '
|
||||
elif match.group(2): # If newlines matched (e.g., '\n\n')
|
||||
return '\n'
|
||||
elif match.group(3): # If tabs matched (e.g., '\t\t')
|
||||
return '\t'
|
||||
return '' # Should not be reached if pattern is comprehensive
|
||||
|
||||
def assert_equal_enough(tokens:List[Token], rev):
|
||||
# the tokens exclude newlines
|
||||
# we allow extra whitespace at the beginning or end
|
||||
token_doc = ''.join(str(t) for t in tokens)
|
||||
token_doc = re.sub(r'\s+', ' ', token_doc).strip()
|
||||
rev = re.sub(r'\s+', ' ', rev).strip()
|
||||
|
||||
print(token_doc, file = open('token','w'))
|
||||
print(rev, file = open('rev','w'))
|
||||
token_doc = re.sub(r'( +)|(\n+)|(\t+)', _replace_whitespace, token_doc).strip()
|
||||
rev = re.sub(r'( +)|(\n+)|(\t+)', _replace_whitespace, rev).strip()
|
||||
assert token_doc == rev
|
||||
|
||||
|
||||
@@ -136,6 +144,26 @@ def test_highlight_range():
|
||||
assert_equal_enough(a, rev1)
|
||||
assert_equal_enough(b, rev2)
|
||||
|
||||
def test_unmatched_parmoves():
|
||||
rev1 = open("test/test_diff_revisions/test_unmatched_parmoves_from").read()
|
||||
rev2 = open("test/test_diff_revisions/test_unmatched_parmoves_to").read()
|
||||
matcher = WikiDiffMatcher([rev1,rev2])
|
||||
diff_processor = matcher.processor()
|
||||
diff_processor.process(rev1)
|
||||
ops, a, b = diff_processor.process(rev2)
|
||||
assert_equal_enough(a, rev1)
|
||||
assert_equal_enough(b, rev2)
|
||||
|
||||
def test_bug_4():
|
||||
rev1 = open("test/test_diff_revisions/test_bug_4_from").read()
|
||||
rev2 = open("test/test_diff_revisions/test_bug_4_to").read()
|
||||
matcher = WikiDiffMatcher([rev1,rev2])
|
||||
diff_processor = matcher.processor()
|
||||
diff_processor.process(rev1)
|
||||
ops, a, b = diff_processor.process(rev2)
|
||||
assert_equal_enough(a, rev1)
|
||||
assert_equal_enough(b, rev2)
|
||||
|
||||
|
||||
def test_delete():
|
||||
rev1 = open("test/test_diff_revisions/1295229484").read()
|
||||
@@ -295,18 +323,31 @@ def test_leading_whitespace():
|
||||
assert_equal_enough(b, rev2)
|
||||
assert_equal_enough(a, rev1)
|
||||
|
||||
# def test_whitespace_2():
|
||||
# rev1 = open("test/test_diff_revisions/test_whitespace_2_from").read()
|
||||
# rev2 = open("test/test_diff_revisions/test_whitespace_2_to").read()
|
||||
# matcher = WikiDiffMatcher([rev1,rev2])
|
||||
# diff_processor = matcher.processor()
|
||||
def test_whitespace_bug():
|
||||
rev1 = open("test/test_diff_revisions/test_whitespace_bug_from").read()
|
||||
rev2 = open("test/test_diff_revisions/test_whitespace_bug_to").read()
|
||||
matcher = WikiDiffMatcher([rev1,rev2])
|
||||
diff_processor = matcher.processor()
|
||||
|
||||
# # note that a and b are constructed from the diffs.
|
||||
# # so they reflect the state of the text according to the diff processor
|
||||
# ops, a, b = diff_processor.process(rev1)
|
||||
# ops, a, b = diff_processor.process(rev2)
|
||||
# assert_equal_enough(b, rev2)
|
||||
# assert_equal_enough(a, rev1)
|
||||
# note that a and b are constructed from the diffs.
|
||||
# so they reflect the state of the text according to the diff processor
|
||||
ops, a, b = diff_processor.process(rev1)
|
||||
ops, a, b = diff_processor.process(rev2)
|
||||
assert_equal_enough(b, rev2)
|
||||
assert_equal_enough(a, rev1)
|
||||
|
||||
def test_bug_3():
|
||||
rev1 = open("test/test_diff_revisions/test_bug_3_from").read()
|
||||
rev2 = open("test/test_diff_revisions/test_bug_3_to").read()
|
||||
matcher = WikiDiffMatcher([rev1,rev2])
|
||||
diff_processor = matcher.processor()
|
||||
|
||||
# note that a and b are constructed from the diffs.
|
||||
# so they reflect the state of the text according to the diff processor
|
||||
ops, a, b = diff_processor.process(rev1)
|
||||
ops, a, b = diff_processor.process(rev2)
|
||||
assert_equal_enough(b, rev2)
|
||||
#assert_equal_enough(a, rev1)
|
||||
|
||||
|
||||
|
||||
@@ -326,15 +367,14 @@ def test_actually_equal():
|
||||
assert_equal_enough(b, rev1)
|
||||
assert_equal_enough(a, rev1)
|
||||
|
||||
# slow test
|
||||
# slow test. comment out the following line to enable it.
|
||||
@pytest.mark.skip
|
||||
def test_diff_consistency():
|
||||
from mwxml import Dump
|
||||
stream = subprocess.Popen(["7za", "x", "-so", "test/dumps/sailormoon.xml.7z", "*.xml"], stdout=subprocess.PIPE).stdout
|
||||
|
||||
dump = Dump.from_file(stream)
|
||||
#stream = subprocess.Popen(["7za", "x", "-so", "test/dumps/ikwiki-20180301-pages-meta-history.xml.bz2", "*.xml"], stdout=subprocess.PIPE).stdout
|
||||
dump = Dump.from_file("test/dumps/ikwiki.xml")
|
||||
for page in dump:
|
||||
revisions = [rev.text for rev in page if rev.text]
|
||||
|
||||
matcher = WikiDiffMatcher(revisions)
|
||||
diff_processor = matcher.processor()
|
||||
last_rev = ""
|
||||
@@ -342,7 +382,44 @@ def test_diff_consistency():
|
||||
print(rev, file=open("test_unicode_highlight_to",'w'))
|
||||
print(last_rev, file=open("test_unicode_highlight_from",'w'))
|
||||
ops, a, b = diff_processor.process(rev)
|
||||
#assert_equal_enough(a, last_rev)
|
||||
|
||||
assert_equal_enough(a, last_rev)
|
||||
assert_equal_enough(b, rev)
|
||||
last_rev = rev
|
||||
|
||||
#@pytest.mark.skip
|
||||
def test_benchmark_diff(benchmark):
|
||||
from mwxml import Dump
|
||||
dump = Dump.from_file("test/dumps/ikwiki.xml")
|
||||
revs = chain.from_iterable([rev.text for rev in page] for page in dump)
|
||||
def next_revs():
|
||||
return [next(revs), next(revs)], {}
|
||||
|
||||
benchmark.pedantic(WikiDiffMatcher,setup=next_revs,iterations=1,rounds=1000, warmup_rounds=1)
|
||||
|
||||
def test_benchmark_diff_server(start_stop_server,benchmark):
|
||||
from mwxml import Dump
|
||||
dump = Dump.from_file("test/dumps/ikwiki.xml")
|
||||
revs = chain.from_iterable([rev.text for rev in page] for page in dump)
|
||||
def next_revs():
|
||||
return [next(revs), next(revs)], {'server':True}
|
||||
|
||||
benchmark.pedantic(WikiDiffMatcher,setup=next_revs,iterations=1,rounds=1000, warmup_rounds=1)
|
||||
|
||||
@pytest.mark.skip
|
||||
def test_diff_consistency_server():
|
||||
from mwxml import Dump
|
||||
#stream = subprocess.Popen(["7za", "x", "-so", "test/dumps/ikwiki-20180301-pages-meta-history.xml.bz2", "*.xml"], stdout=subprocess.PIPE).stdout
|
||||
dump = Dump.from_file("test/dumps/ikwiki.xml")
|
||||
for page in dump:
|
||||
revisions = [rev.text for rev in page if rev.text]
|
||||
matcher = WikiDiffMatcher(revisions,server=True)
|
||||
diff_processor = matcher.processor()
|
||||
last_rev = ""
|
||||
for rev in revisions:
|
||||
print(rev, file=open("test_unicode_highlight_to",'w'))
|
||||
print(last_rev, file=open("test_unicode_highlight_from",'w'))
|
||||
ops, a, b = diff_processor.process(rev)
|
||||
assert_equal_enough(a, last_rev)
|
||||
assert_equal_enough(b, rev)
|
||||
last_rev = rev
|
||||
|
||||
|
||||
Reference in New Issue
Block a user