remove server.
This commit is contained in:
parent
14e819e565
commit
cc96bb5f3f
@ -1,6 +1,3 @@
|
|||||||
# start the server
|
|
||||||
import asyncio
|
|
||||||
import subprocess
|
|
||||||
from itertools import chain
|
from itertools import chain
|
||||||
from functools import partial
|
from functools import partial
|
||||||
import re
|
import re
|
||||||
@ -11,22 +8,6 @@ from deltas import Delete, Equal, Insert, wikitext_split
|
|||||||
from mwpersistence import Token
|
from mwpersistence import Token
|
||||||
from wiki_diff_matcher import WikiDiffMatcher
|
from wiki_diff_matcher import WikiDiffMatcher
|
||||||
|
|
||||||
@pytest_asyncio.fixture(scope="module", autouse=False)
|
|
||||||
async def start_stop_server():
|
|
||||||
print("starting server")
|
|
||||||
proc = await asyncio.create_subprocess_exec("php", "-S", "127.0.0.1:8000",
|
|
||||||
"wikidiff2_api.php", "-c", "php.ini",
|
|
||||||
stdout=subprocess.PIPE,
|
|
||||||
stderr=subprocess.PIPE)
|
|
||||||
# php needs a moment to actually start
|
|
||||||
await asyncio.sleep(0.1)
|
|
||||||
yield proc
|
|
||||||
print("stopping server")
|
|
||||||
proc.terminate()
|
|
||||||
stdout, stderr = await proc.communicate()
|
|
||||||
print(stdout.decode())
|
|
||||||
print(stderr.decode())
|
|
||||||
|
|
||||||
def _replace_whitespace(match):
|
def _replace_whitespace(match):
|
||||||
if match.group(1): # If spaces matched (e.g., ' ')
|
if match.group(1): # If spaces matched (e.g., ' ')
|
||||||
return ' '
|
return ' '
|
||||||
@ -371,7 +352,6 @@ def test_actually_equal():
|
|||||||
@pytest.mark.skip
|
@pytest.mark.skip
|
||||||
def test_diff_consistency():
|
def test_diff_consistency():
|
||||||
from mwxml import Dump
|
from mwxml import Dump
|
||||||
#stream = subprocess.Popen(["7za", "x", "-so", "test/dumps/ikwiki-20180301-pages-meta-history.xml.bz2", "*.xml"], stdout=subprocess.PIPE).stdout
|
|
||||||
dump = Dump.from_file("test/dumps/ikwiki.xml")
|
dump = Dump.from_file("test/dumps/ikwiki.xml")
|
||||||
for page in dump:
|
for page in dump:
|
||||||
revisions = [rev.text for rev in page if rev.text]
|
revisions = [rev.text for rev in page if rev.text]
|
||||||
@ -386,7 +366,7 @@ def test_diff_consistency():
|
|||||||
assert_equal_enough(b, rev)
|
assert_equal_enough(b, rev)
|
||||||
last_rev = rev
|
last_rev = rev
|
||||||
|
|
||||||
#@pytest.mark.skip
|
@pytest.mark.skip
|
||||||
def test_benchmark_diff(benchmark):
|
def test_benchmark_diff(benchmark):
|
||||||
from mwxml import Dump
|
from mwxml import Dump
|
||||||
dump = Dump.from_file("test/dumps/ikwiki.xml")
|
dump = Dump.from_file("test/dumps/ikwiki.xml")
|
||||||
@ -394,32 +374,5 @@ def test_benchmark_diff(benchmark):
|
|||||||
def next_revs():
|
def next_revs():
|
||||||
return [next(revs), next(revs)], {}
|
return [next(revs), next(revs)], {}
|
||||||
|
|
||||||
benchmark.pedantic(WikiDiffMatcher,setup=next_revs,iterations=1,rounds=1000, warmup_rounds=1)
|
benchmark.pedantic(WikiDiffMatcher, setup=next_revs, iterations=1,rounds=1000, warmup_rounds=1)
|
||||||
|
|
||||||
def test_benchmark_diff_server(start_stop_server,benchmark):
|
|
||||||
from mwxml import Dump
|
|
||||||
dump = Dump.from_file("test/dumps/ikwiki.xml")
|
|
||||||
revs = chain.from_iterable([rev.text for rev in page] for page in dump)
|
|
||||||
def next_revs():
|
|
||||||
return [next(revs), next(revs)], {'server':True}
|
|
||||||
|
|
||||||
benchmark.pedantic(WikiDiffMatcher,setup=next_revs,iterations=1,rounds=1000, warmup_rounds=1)
|
|
||||||
|
|
||||||
@pytest.mark.skip
|
|
||||||
def test_diff_consistency_server():
|
|
||||||
from mwxml import Dump
|
|
||||||
#stream = subprocess.Popen(["7za", "x", "-so", "test/dumps/ikwiki-20180301-pages-meta-history.xml.bz2", "*.xml"], stdout=subprocess.PIPE).stdout
|
|
||||||
dump = Dump.from_file("test/dumps/ikwiki.xml")
|
|
||||||
for page in dump:
|
|
||||||
revisions = [rev.text for rev in page if rev.text]
|
|
||||||
matcher = WikiDiffMatcher(revisions,server=True)
|
|
||||||
diff_processor = matcher.processor()
|
|
||||||
last_rev = ""
|
|
||||||
for rev in revisions:
|
|
||||||
print(rev, file=open("test_unicode_highlight_to",'w'))
|
|
||||||
print(last_rev, file=open("test_unicode_highlight_from",'w'))
|
|
||||||
ops, a, b = diff_processor.process(rev)
|
|
||||||
assert_equal_enough(a, last_rev)
|
|
||||||
assert_equal_enough(b, rev)
|
|
||||||
last_rev = rev
|
|
||||||
|
|
||||||
|
@ -4,48 +4,12 @@ from collections import namedtuple
|
|||||||
from itertools import chain
|
from itertools import chain
|
||||||
from typing import Dict, Generator, List, Optional, Tuple
|
from typing import Dict, Generator, List, Optional, Tuple
|
||||||
|
|
||||||
import requests
|
|
||||||
from deltas import (Delete, DiffEngine, Equal, Insert, Operation,
|
from deltas import (Delete, DiffEngine, Equal, Insert, Operation,
|
||||||
RegexTokenizer, Token, tokenizers)
|
RegexTokenizer, Token, tokenizers)
|
||||||
from sortedcontainers import SortedDict
|
from sortedcontainers import SortedDict
|
||||||
|
|
||||||
TOKENIZER = tokenizers.wikitext_split
|
TOKENIZER = tokenizers.wikitext_split
|
||||||
import pywikidiff2
|
import pywikidiff2
|
||||||
differ = pywikidiff2.pywikidiff2(numContextLines=1000000,
|
|
||||||
moved_paragraph_detection_cutoff=200000)
|
|
||||||
|
|
||||||
def compute_diffs_server(texts, url="http://127.0.0.1:8000"):
|
|
||||||
response = None
|
|
||||||
try:
|
|
||||||
response = requests.post(url, json=texts)
|
|
||||||
response.raise_for_status()
|
|
||||||
incremental_diffs = response.json()
|
|
||||||
except requests.exceptions.ConnectionError as e:
|
|
||||||
print(
|
|
||||||
f"Connection Error: Could not connect to the server at {url}. Make sure your local server is running."
|
|
||||||
)
|
|
||||||
print(e)
|
|
||||||
raise e
|
|
||||||
except requests.exceptions.HTTPError as e:
|
|
||||||
print(f"HTTP Error: {e}")
|
|
||||||
if response is not None:
|
|
||||||
print(f"Response Body: {response.text}")
|
|
||||||
raise e
|
|
||||||
except requests.exceptions.JSONDecodeError as e:
|
|
||||||
# Must come before RequestException as JSONDecodeError is
|
|
||||||
# a subclass.
|
|
||||||
print(f"JSON Decode Error: {e}", file=sys.stderr)
|
|
||||||
if response is not None:
|
|
||||||
print(f"Response Body: {response.text}", file=sys.stderr)
|
|
||||||
raise e
|
|
||||||
except requests.exceptions.RequestException as e:
|
|
||||||
print(f"An unexpected error occurred: {e}")
|
|
||||||
raise e
|
|
||||||
return incremental_diffs
|
|
||||||
|
|
||||||
|
|
||||||
def compute_diffs(texts: list[str]) -> list:
|
|
||||||
return differ.inline_json_diff_sequence(texts)
|
|
||||||
|
|
||||||
class DiffToOperationMap:
|
class DiffToOperationMap:
|
||||||
def __init__(self, diff, tokenizer):
|
def __init__(self, diff, tokenizer):
|
||||||
@ -402,14 +366,12 @@ class WikiDiffMatcher:
|
|||||||
self,
|
self,
|
||||||
texts: list[str] = None,
|
texts: list[str] = None,
|
||||||
tokenizer: Optional[RegexTokenizer] = None,
|
tokenizer: Optional[RegexTokenizer] = None,
|
||||||
url: Optional[str] = "http://127.0.0.1:8000",
|
|
||||||
server=False
|
|
||||||
):
|
):
|
||||||
|
differ = pywikidiff2.pywikidiff2(numContextLines=1000000,
|
||||||
|
moved_paragraph_detection_cutoff=200000)
|
||||||
# Pre-compute diffs to reduce traffic overhead.
|
# Pre-compute diffs to reduce traffic overhead.
|
||||||
if server is True:
|
self.diffs = differ.inline_json_diff_sequence(list(texts))
|
||||||
self.diffs = list(compute_diffs_server(list(texts),url))
|
|
||||||
else:
|
|
||||||
self.diffs = list(compute_diffs(list(texts)))
|
|
||||||
self.tokenizer = tokenizer or TOKENIZER
|
self.tokenizer = tokenizer or TOKENIZER
|
||||||
|
|
||||||
class Processor(DiffEngine.Processor):
|
class Processor(DiffEngine.Processor):
|
||||||
|
@ -1,28 +0,0 @@
|
|||||||
<?php
|
|
||||||
|
|
||||||
header("Cache-Control: no-store, no-cache, must-revalidate, max-age=0");
|
|
||||||
header("Cache-Control: post-check=0, pre-check=0", false);
|
|
||||||
header("Pragma: no-cache");
|
|
||||||
|
|
||||||
// Launch this server with:
|
|
||||||
// php -S localhost:8000 -q -c php.ini
|
|
||||||
|
|
||||||
// Call the server with:
|
|
||||||
// curl -X POST -H "Content-Type: application/json" \
|
|
||||||
// -d '{"arg1": "aa", "arg2": "aba"}' \
|
|
||||||
// http://localhost:8000
|
|
||||||
|
|
||||||
// Get the raw POST data
|
|
||||||
$rawData = file_get_contents('php://input');
|
|
||||||
|
|
||||||
// Decode the JSON data
|
|
||||||
$data = json_decode($rawData, true);
|
|
||||||
|
|
||||||
$previous = '';
|
|
||||||
$result = [];
|
|
||||||
foreach ($data as $i => $value) {
|
|
||||||
$result[] = wikidiff2_inline_json_diff($previous, $value, 5000000);
|
|
||||||
$previous = $value;
|
|
||||||
}
|
|
||||||
|
|
||||||
echo json_encode($result);
|
|
Loading…
Reference in New Issue
Block a user