remove server.

This commit is contained in:
Nathan TeBlunthuis 2025-07-07 11:21:16 -07:00
parent 14e819e565
commit cc96bb5f3f
3 changed files with 7 additions and 120 deletions

View File

@ -1,6 +1,3 @@
# start the server
import asyncio
import subprocess
from itertools import chain
from functools import partial
import re
@ -11,22 +8,6 @@ from deltas import Delete, Equal, Insert, wikitext_split
from mwpersistence import Token
from wiki_diff_matcher import WikiDiffMatcher
@pytest_asyncio.fixture(scope="module", autouse=False)
async def start_stop_server():
print("starting server")
proc = await asyncio.create_subprocess_exec("php", "-S", "127.0.0.1:8000",
"wikidiff2_api.php", "-c", "php.ini",
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
# php needs a moment to actually start
await asyncio.sleep(0.1)
yield proc
print("stopping server")
proc.terminate()
stdout, stderr = await proc.communicate()
print(stdout.decode())
print(stderr.decode())
def _replace_whitespace(match):
if match.group(1): # If spaces matched (e.g., ' ')
return ' '
@ -371,7 +352,6 @@ def test_actually_equal():
@pytest.mark.skip
def test_diff_consistency():
from mwxml import Dump
#stream = subprocess.Popen(["7za", "x", "-so", "test/dumps/ikwiki-20180301-pages-meta-history.xml.bz2", "*.xml"], stdout=subprocess.PIPE).stdout
dump = Dump.from_file("test/dumps/ikwiki.xml")
for page in dump:
revisions = [rev.text for rev in page if rev.text]
@ -386,7 +366,7 @@ def test_diff_consistency():
assert_equal_enough(b, rev)
last_rev = rev
#@pytest.mark.skip
@pytest.mark.skip
def test_benchmark_diff(benchmark):
from mwxml import Dump
dump = Dump.from_file("test/dumps/ikwiki.xml")
@ -394,32 +374,5 @@ def test_benchmark_diff(benchmark):
def next_revs():
return [next(revs), next(revs)], {}
benchmark.pedantic(WikiDiffMatcher,setup=next_revs,iterations=1,rounds=1000, warmup_rounds=1)
def test_benchmark_diff_server(start_stop_server,benchmark):
from mwxml import Dump
dump = Dump.from_file("test/dumps/ikwiki.xml")
revs = chain.from_iterable([rev.text for rev in page] for page in dump)
def next_revs():
return [next(revs), next(revs)], {'server':True}
benchmark.pedantic(WikiDiffMatcher,setup=next_revs,iterations=1,rounds=1000, warmup_rounds=1)
@pytest.mark.skip
def test_diff_consistency_server():
from mwxml import Dump
#stream = subprocess.Popen(["7za", "x", "-so", "test/dumps/ikwiki-20180301-pages-meta-history.xml.bz2", "*.xml"], stdout=subprocess.PIPE).stdout
dump = Dump.from_file("test/dumps/ikwiki.xml")
for page in dump:
revisions = [rev.text for rev in page if rev.text]
matcher = WikiDiffMatcher(revisions,server=True)
diff_processor = matcher.processor()
last_rev = ""
for rev in revisions:
print(rev, file=open("test_unicode_highlight_to",'w'))
print(last_rev, file=open("test_unicode_highlight_from",'w'))
ops, a, b = diff_processor.process(rev)
assert_equal_enough(a, last_rev)
assert_equal_enough(b, rev)
last_rev = rev
benchmark.pedantic(WikiDiffMatcher, setup=next_revs, iterations=1,rounds=1000, warmup_rounds=1)

View File

@ -4,48 +4,12 @@ from collections import namedtuple
from itertools import chain
from typing import Dict, Generator, List, Optional, Tuple
import requests
from deltas import (Delete, DiffEngine, Equal, Insert, Operation,
RegexTokenizer, Token, tokenizers)
from sortedcontainers import SortedDict
TOKENIZER = tokenizers.wikitext_split
import pywikidiff2
differ = pywikidiff2.pywikidiff2(numContextLines=1000000,
moved_paragraph_detection_cutoff=200000)
def compute_diffs_server(texts, url="http://127.0.0.1:8000"):
response = None
try:
response = requests.post(url, json=texts)
response.raise_for_status()
incremental_diffs = response.json()
except requests.exceptions.ConnectionError as e:
print(
f"Connection Error: Could not connect to the server at {url}. Make sure your local server is running."
)
print(e)
raise e
except requests.exceptions.HTTPError as e:
print(f"HTTP Error: {e}")
if response is not None:
print(f"Response Body: {response.text}")
raise e
except requests.exceptions.JSONDecodeError as e:
# Must come before RequestException as JSONDecodeError is
# a subclass.
print(f"JSON Decode Error: {e}", file=sys.stderr)
if response is not None:
print(f"Response Body: {response.text}", file=sys.stderr)
raise e
except requests.exceptions.RequestException as e:
print(f"An unexpected error occurred: {e}")
raise e
return incremental_diffs
def compute_diffs(texts: list[str]) -> list:
return differ.inline_json_diff_sequence(texts)
class DiffToOperationMap:
def __init__(self, diff, tokenizer):
@ -402,14 +366,12 @@ class WikiDiffMatcher:
self,
texts: list[str] = None,
tokenizer: Optional[RegexTokenizer] = None,
url: Optional[str] = "http://127.0.0.1:8000",
server=False
):
differ = pywikidiff2.pywikidiff2(numContextLines=1000000,
moved_paragraph_detection_cutoff=200000)
# Pre-compute diffs to reduce traffic overhead.
if server is True:
self.diffs = list(compute_diffs_server(list(texts),url))
else:
self.diffs = list(compute_diffs(list(texts)))
self.diffs = differ.inline_json_diff_sequence(list(texts))
self.tokenizer = tokenizer or TOKENIZER
class Processor(DiffEngine.Processor):

View File

@ -1,28 +0,0 @@
<?php
header("Cache-Control: no-store, no-cache, must-revalidate, max-age=0");
header("Cache-Control: post-check=0, pre-check=0", false);
header("Pragma: no-cache");
// Launch this server with:
// php -S localhost:8000 -q -c php.ini
// Call the server with:
// curl -X POST -H "Content-Type: application/json" \
// -d '{"arg1": "aa", "arg2": "aba"}' \
// http://localhost:8000
// Get the raw POST data
$rawData = file_get_contents('php://input');
// Decode the JSON data
$data = json_decode($rawData, true);
$previous = '';
$result = [];
foreach ($data as $i => $value) {
$result[] = wikidiff2_inline_json_diff($previous, $value, 5000000);
$previous = $value;
}
echo json_encode($result);