remove server.
This commit is contained in:
		
							parent
							
								
									14e819e565
								
							
						
					
					
						commit
						cc96bb5f3f
					
				| @ -1,6 +1,3 @@ | |||||||
| # start the server |  | ||||||
| import asyncio |  | ||||||
| import subprocess |  | ||||||
| from itertools import chain | from itertools import chain | ||||||
| from functools import partial | from functools import partial | ||||||
| import re | import re | ||||||
| @ -11,22 +8,6 @@ from deltas import Delete, Equal, Insert, wikitext_split | |||||||
| from mwpersistence import Token | from mwpersistence import Token | ||||||
| from wiki_diff_matcher import WikiDiffMatcher | from wiki_diff_matcher import WikiDiffMatcher | ||||||
| 
 | 
 | ||||||
| @pytest_asyncio.fixture(scope="module", autouse=False) |  | ||||||
| async def start_stop_server(): |  | ||||||
|     print("starting server") |  | ||||||
|     proc = await asyncio.create_subprocess_exec("php", "-S", "127.0.0.1:8000", |  | ||||||
|                                                 "wikidiff2_api.php", "-c", "php.ini", |  | ||||||
|                                                 stdout=subprocess.PIPE, |  | ||||||
|                                                 stderr=subprocess.PIPE) |  | ||||||
|     # php needs a moment to actually start |  | ||||||
|     await asyncio.sleep(0.1) |  | ||||||
|     yield proc |  | ||||||
|     print("stopping server") |  | ||||||
|     proc.terminate() |  | ||||||
|     stdout, stderr = await proc.communicate() |  | ||||||
|     print(stdout.decode()) |  | ||||||
|     print(stderr.decode()) |  | ||||||
| 
 |  | ||||||
| def _replace_whitespace(match): | def _replace_whitespace(match): | ||||||
|     if match.group(1):  # If spaces matched (e.g., '  ') |     if match.group(1):  # If spaces matched (e.g., '  ') | ||||||
|         return ' ' |         return ' ' | ||||||
| @ -371,7 +352,6 @@ def test_actually_equal(): | |||||||
| @pytest.mark.skip | @pytest.mark.skip | ||||||
| def test_diff_consistency(): | def test_diff_consistency(): | ||||||
|     from mwxml import Dump |     from mwxml import Dump | ||||||
|     #stream = subprocess.Popen(["7za", "x", "-so", "test/dumps/ikwiki-20180301-pages-meta-history.xml.bz2", "*.xml"], stdout=subprocess.PIPE).stdout |  | ||||||
|     dump = Dump.from_file("test/dumps/ikwiki.xml") |     dump = Dump.from_file("test/dumps/ikwiki.xml") | ||||||
|     for page in dump: |     for page in dump: | ||||||
|         revisions = [rev.text for rev in page if rev.text] |         revisions = [rev.text for rev in page if rev.text] | ||||||
| @ -386,7 +366,7 @@ def test_diff_consistency(): | |||||||
|             assert_equal_enough(b, rev) |             assert_equal_enough(b, rev) | ||||||
|             last_rev = rev |             last_rev = rev | ||||||
| 
 | 
 | ||||||
| #@pytest.mark.skip | @pytest.mark.skip | ||||||
| def test_benchmark_diff(benchmark): | def test_benchmark_diff(benchmark): | ||||||
|     from mwxml import Dump |     from mwxml import Dump | ||||||
|     dump = Dump.from_file("test/dumps/ikwiki.xml") |     dump = Dump.from_file("test/dumps/ikwiki.xml") | ||||||
| @ -394,32 +374,5 @@ def test_benchmark_diff(benchmark): | |||||||
|     def next_revs(): |     def next_revs(): | ||||||
|         return [next(revs), next(revs)], {} |         return [next(revs), next(revs)], {} | ||||||
| 
 | 
 | ||||||
|     benchmark.pedantic(WikiDiffMatcher,setup=next_revs,iterations=1,rounds=1000, warmup_rounds=1) |     benchmark.pedantic(WikiDiffMatcher, setup=next_revs, iterations=1,rounds=1000, warmup_rounds=1) | ||||||
| 
 |  | ||||||
| def test_benchmark_diff_server(start_stop_server,benchmark): |  | ||||||
|     from mwxml import Dump |  | ||||||
|     dump = Dump.from_file("test/dumps/ikwiki.xml") |  | ||||||
|     revs = chain.from_iterable([rev.text for rev in page] for page in dump) |  | ||||||
|     def next_revs(): |  | ||||||
|         return [next(revs), next(revs)], {'server':True} |  | ||||||
| 
 |  | ||||||
|     benchmark.pedantic(WikiDiffMatcher,setup=next_revs,iterations=1,rounds=1000, warmup_rounds=1) |  | ||||||
| 
 |  | ||||||
| @pytest.mark.skip |  | ||||||
| def test_diff_consistency_server(): |  | ||||||
|     from mwxml import Dump |  | ||||||
|     #stream = subprocess.Popen(["7za", "x", "-so", "test/dumps/ikwiki-20180301-pages-meta-history.xml.bz2", "*.xml"], stdout=subprocess.PIPE).stdout |  | ||||||
|     dump = Dump.from_file("test/dumps/ikwiki.xml") |  | ||||||
|     for page in dump: |  | ||||||
|         revisions = [rev.text for rev in page if rev.text] |  | ||||||
|         matcher = WikiDiffMatcher(revisions,server=True) |  | ||||||
|         diff_processor = matcher.processor() |  | ||||||
|         last_rev = "" |  | ||||||
|         for rev in revisions: |  | ||||||
|             print(rev, file=open("test_unicode_highlight_to",'w')) |  | ||||||
|             print(last_rev, file=open("test_unicode_highlight_from",'w')) |  | ||||||
|             ops, a, b = diff_processor.process(rev) |  | ||||||
|             assert_equal_enough(a, last_rev) |  | ||||||
|             assert_equal_enough(b, rev) |  | ||||||
|             last_rev = rev |  | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -4,48 +4,12 @@ from collections import namedtuple | |||||||
| from itertools import chain | from itertools import chain | ||||||
| from typing import Dict, Generator, List, Optional, Tuple | from typing import Dict, Generator, List, Optional, Tuple | ||||||
| 
 | 
 | ||||||
| import requests |  | ||||||
| from deltas import (Delete, DiffEngine, Equal, Insert, Operation, | from deltas import (Delete, DiffEngine, Equal, Insert, Operation, | ||||||
|                     RegexTokenizer, Token, tokenizers) |                     RegexTokenizer, Token, tokenizers) | ||||||
| from sortedcontainers import SortedDict | from sortedcontainers import SortedDict | ||||||
| 
 | 
 | ||||||
| TOKENIZER = tokenizers.wikitext_split | TOKENIZER = tokenizers.wikitext_split | ||||||
| import pywikidiff2  | import pywikidiff2 | ||||||
| differ = pywikidiff2.pywikidiff2(numContextLines=1000000, |  | ||||||
|                                  moved_paragraph_detection_cutoff=200000) |  | ||||||
| 
 |  | ||||||
| def compute_diffs_server(texts, url="http://127.0.0.1:8000"): |  | ||||||
|     response = None |  | ||||||
|     try: |  | ||||||
|         response = requests.post(url, json=texts) |  | ||||||
|         response.raise_for_status() |  | ||||||
|         incremental_diffs = response.json() |  | ||||||
|     except requests.exceptions.ConnectionError as e: |  | ||||||
|         print( |  | ||||||
|             f"Connection Error: Could not connect to the server at {url}. Make sure your local server is running." |  | ||||||
|         ) |  | ||||||
|         print(e) |  | ||||||
|         raise e |  | ||||||
|     except requests.exceptions.HTTPError as e: |  | ||||||
|         print(f"HTTP Error: {e}") |  | ||||||
|         if response is not None: |  | ||||||
|             print(f"Response Body: {response.text}") |  | ||||||
|         raise e |  | ||||||
|     except requests.exceptions.JSONDecodeError as e: |  | ||||||
|         # Must come before RequestException as JSONDecodeError is |  | ||||||
|         # a subclass. |  | ||||||
|         print(f"JSON Decode Error: {e}", file=sys.stderr) |  | ||||||
|         if response is not None: |  | ||||||
|             print(f"Response Body: {response.text}", file=sys.stderr) |  | ||||||
|         raise e |  | ||||||
|     except requests.exceptions.RequestException as e: |  | ||||||
|         print(f"An unexpected error occurred: {e}") |  | ||||||
|         raise e |  | ||||||
|     return incremental_diffs |  | ||||||
|          |  | ||||||
| 
 |  | ||||||
| def compute_diffs(texts: list[str]) -> list: |  | ||||||
|     return differ.inline_json_diff_sequence(texts) |  | ||||||
| 
 | 
 | ||||||
| class DiffToOperationMap: | class DiffToOperationMap: | ||||||
|     def __init__(self, diff, tokenizer): |     def __init__(self, diff, tokenizer): | ||||||
| @ -402,14 +366,12 @@ class WikiDiffMatcher: | |||||||
|         self, |         self, | ||||||
|         texts: list[str] = None, |         texts: list[str] = None, | ||||||
|         tokenizer: Optional[RegexTokenizer] = None, |         tokenizer: Optional[RegexTokenizer] = None, | ||||||
|         url: Optional[str] = "http://127.0.0.1:8000", |  | ||||||
|         server=False |  | ||||||
|     ): |     ): | ||||||
|  |         differ = pywikidiff2.pywikidiff2(numContextLines=1000000, | ||||||
|  |                                          moved_paragraph_detection_cutoff=200000) | ||||||
|         # Pre-compute diffs to reduce traffic overhead. |         # Pre-compute diffs to reduce traffic overhead. | ||||||
|         if server is True: |         self.diffs = differ.inline_json_diff_sequence(list(texts)) | ||||||
|             self.diffs = list(compute_diffs_server(list(texts),url)) | 
 | ||||||
|         else: |  | ||||||
|             self.diffs = list(compute_diffs(list(texts))) |  | ||||||
|         self.tokenizer = tokenizer or TOKENIZER |         self.tokenizer = tokenizer or TOKENIZER | ||||||
| 
 | 
 | ||||||
|     class Processor(DiffEngine.Processor): |     class Processor(DiffEngine.Processor): | ||||||
|  | |||||||
| @ -1,28 +0,0 @@ | |||||||
| <?php |  | ||||||
| 
 |  | ||||||
| header("Cache-Control: no-store, no-cache, must-revalidate, max-age=0"); |  | ||||||
| header("Cache-Control: post-check=0, pre-check=0", false); |  | ||||||
| header("Pragma: no-cache"); |  | ||||||
| 
 |  | ||||||
| // Launch this server with:
 |  | ||||||
| // php -S localhost:8000 -q -c php.ini
 |  | ||||||
| 
 |  | ||||||
| // Call the server with:
 |  | ||||||
| // curl -X POST -H "Content-Type: application/json" \
 |  | ||||||
| //   -d '{"arg1": "aa", "arg2": "aba"}' \
 |  | ||||||
| //   http://localhost:8000
 |  | ||||||
| 
 |  | ||||||
| // Get the raw POST data
 |  | ||||||
| $rawData = file_get_contents('php://input'); |  | ||||||
| 
 |  | ||||||
| // Decode the JSON data
 |  | ||||||
| $data = json_decode($rawData, true); |  | ||||||
| 
 |  | ||||||
| $previous = ''; |  | ||||||
| $result = []; |  | ||||||
| foreach ($data as $i => $value) { |  | ||||||
|     $result[] = wikidiff2_inline_json_diff($previous, $value, 5000000); |  | ||||||
|     $previous = $value; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| echo json_encode($result); |  | ||||||
		Loading…
	
		Reference in New Issue
	
	Block a user