From cc96bb5f3fd9280a0551afd46edc511d8b1f6400 Mon Sep 17 00:00:00 2001 From: Nathan TeBlunthuis Date: Mon, 7 Jul 2025 11:21:16 -0700 Subject: [PATCH] remove server. --- test/test_wiki_diff_matcher.py | 51 ++-------------------------------- wiki_diff_matcher.py | 48 ++++---------------------------- wikidiff2_api.php | 28 ------------------- 3 files changed, 7 insertions(+), 120 deletions(-) delete mode 100644 wikidiff2_api.php diff --git a/test/test_wiki_diff_matcher.py b/test/test_wiki_diff_matcher.py index 0592e12..fee41f3 100644 --- a/test/test_wiki_diff_matcher.py +++ b/test/test_wiki_diff_matcher.py @@ -1,6 +1,3 @@ -# start the server -import asyncio -import subprocess from itertools import chain from functools import partial import re @@ -11,22 +8,6 @@ from deltas import Delete, Equal, Insert, wikitext_split from mwpersistence import Token from wiki_diff_matcher import WikiDiffMatcher -@pytest_asyncio.fixture(scope="module", autouse=False) -async def start_stop_server(): - print("starting server") - proc = await asyncio.create_subprocess_exec("php", "-S", "127.0.0.1:8000", - "wikidiff2_api.php", "-c", "php.ini", - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) - # php needs a moment to actually start - await asyncio.sleep(0.1) - yield proc - print("stopping server") - proc.terminate() - stdout, stderr = await proc.communicate() - print(stdout.decode()) - print(stderr.decode()) - def _replace_whitespace(match): if match.group(1): # If spaces matched (e.g., ' ') return ' ' @@ -371,7 +352,6 @@ def test_actually_equal(): @pytest.mark.skip def test_diff_consistency(): from mwxml import Dump - #stream = subprocess.Popen(["7za", "x", "-so", "test/dumps/ikwiki-20180301-pages-meta-history.xml.bz2", "*.xml"], stdout=subprocess.PIPE).stdout dump = Dump.from_file("test/dumps/ikwiki.xml") for page in dump: revisions = [rev.text for rev in page if rev.text] @@ -386,7 +366,7 @@ def test_diff_consistency(): assert_equal_enough(b, rev) last_rev = rev -#@pytest.mark.skip +@pytest.mark.skip def test_benchmark_diff(benchmark): from mwxml import Dump dump = Dump.from_file("test/dumps/ikwiki.xml") @@ -394,32 +374,5 @@ def test_benchmark_diff(benchmark): def next_revs(): return [next(revs), next(revs)], {} - benchmark.pedantic(WikiDiffMatcher,setup=next_revs,iterations=1,rounds=1000, warmup_rounds=1) - -def test_benchmark_diff_server(start_stop_server,benchmark): - from mwxml import Dump - dump = Dump.from_file("test/dumps/ikwiki.xml") - revs = chain.from_iterable([rev.text for rev in page] for page in dump) - def next_revs(): - return [next(revs), next(revs)], {'server':True} - - benchmark.pedantic(WikiDiffMatcher,setup=next_revs,iterations=1,rounds=1000, warmup_rounds=1) - -@pytest.mark.skip -def test_diff_consistency_server(): - from mwxml import Dump - #stream = subprocess.Popen(["7za", "x", "-so", "test/dumps/ikwiki-20180301-pages-meta-history.xml.bz2", "*.xml"], stdout=subprocess.PIPE).stdout - dump = Dump.from_file("test/dumps/ikwiki.xml") - for page in dump: - revisions = [rev.text for rev in page if rev.text] - matcher = WikiDiffMatcher(revisions,server=True) - diff_processor = matcher.processor() - last_rev = "" - for rev in revisions: - print(rev, file=open("test_unicode_highlight_to",'w')) - print(last_rev, file=open("test_unicode_highlight_from",'w')) - ops, a, b = diff_processor.process(rev) - assert_equal_enough(a, last_rev) - assert_equal_enough(b, rev) - last_rev = rev + benchmark.pedantic(WikiDiffMatcher, setup=next_revs, iterations=1,rounds=1000, warmup_rounds=1) diff --git a/wiki_diff_matcher.py b/wiki_diff_matcher.py index 1ab5935..c19d7a8 100644 --- a/wiki_diff_matcher.py +++ b/wiki_diff_matcher.py @@ -4,48 +4,12 @@ from collections import namedtuple from itertools import chain from typing import Dict, Generator, List, Optional, Tuple -import requests from deltas import (Delete, DiffEngine, Equal, Insert, Operation, RegexTokenizer, Token, tokenizers) from sortedcontainers import SortedDict TOKENIZER = tokenizers.wikitext_split -import pywikidiff2 -differ = pywikidiff2.pywikidiff2(numContextLines=1000000, - moved_paragraph_detection_cutoff=200000) - -def compute_diffs_server(texts, url="http://127.0.0.1:8000"): - response = None - try: - response = requests.post(url, json=texts) - response.raise_for_status() - incremental_diffs = response.json() - except requests.exceptions.ConnectionError as e: - print( - f"Connection Error: Could not connect to the server at {url}. Make sure your local server is running." - ) - print(e) - raise e - except requests.exceptions.HTTPError as e: - print(f"HTTP Error: {e}") - if response is not None: - print(f"Response Body: {response.text}") - raise e - except requests.exceptions.JSONDecodeError as e: - # Must come before RequestException as JSONDecodeError is - # a subclass. - print(f"JSON Decode Error: {e}", file=sys.stderr) - if response is not None: - print(f"Response Body: {response.text}", file=sys.stderr) - raise e - except requests.exceptions.RequestException as e: - print(f"An unexpected error occurred: {e}") - raise e - return incremental_diffs - - -def compute_diffs(texts: list[str]) -> list: - return differ.inline_json_diff_sequence(texts) +import pywikidiff2 class DiffToOperationMap: def __init__(self, diff, tokenizer): @@ -402,14 +366,12 @@ class WikiDiffMatcher: self, texts: list[str] = None, tokenizer: Optional[RegexTokenizer] = None, - url: Optional[str] = "http://127.0.0.1:8000", - server=False ): + differ = pywikidiff2.pywikidiff2(numContextLines=1000000, + moved_paragraph_detection_cutoff=200000) # Pre-compute diffs to reduce traffic overhead. - if server is True: - self.diffs = list(compute_diffs_server(list(texts),url)) - else: - self.diffs = list(compute_diffs(list(texts))) + self.diffs = differ.inline_json_diff_sequence(list(texts)) + self.tokenizer = tokenizer or TOKENIZER class Processor(DiffEngine.Processor): diff --git a/wikidiff2_api.php b/wikidiff2_api.php deleted file mode 100644 index 0ee5add..0000000 --- a/wikidiff2_api.php +++ /dev/null @@ -1,28 +0,0 @@ - $value) { - $result[] = wikidiff2_inline_json_diff($previous, $value, 5000000); - $previous = $value; -} - -echo json_encode($result);