use signalling to timeout mwparserfromhell instead of asyncio.
This commit is contained in:
@@ -1,7 +1,8 @@
|
|||||||
"""Shared wikitext parser with caching to avoid duplicate parsing."""
|
"""Shared wikitext parser with caching to avoid duplicate parsing."""
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import asyncio
|
import signal
|
||||||
|
|
||||||
import mwparserfromhell
|
import mwparserfromhell
|
||||||
|
|
||||||
PARSER_TIMEOUT = 60 # seconds
|
PARSER_TIMEOUT = 60 # seconds
|
||||||
@@ -22,22 +23,28 @@ class WikitextParser:
|
|||||||
self._cached_wikicode = None
|
self._cached_wikicode = None
|
||||||
self.last_parse_timed_out: bool = False
|
self.last_parse_timed_out: bool = False
|
||||||
|
|
||||||
async def _parse_async(self, text: str):
|
def _timeout_handler(self, signum, frame):
|
||||||
"""Parse wikitext with timeout protection."""
|
raise TimeoutError("mwparserfromhell parse exceeded timeout")
|
||||||
try:
|
|
||||||
result = await asyncio.wait_for(
|
|
||||||
asyncio.to_thread(mwparserfromhell.parse, text),
|
|
||||||
timeout=PARSER_TIMEOUT
|
|
||||||
)
|
|
||||||
return result, False
|
|
||||||
except TimeoutError:
|
|
||||||
return None, True
|
|
||||||
|
|
||||||
def _get_wikicode(self, text: str):
|
def _get_wikicode(self, text: str):
|
||||||
"""Parse text and cache result. Returns cached result if text unchanged."""
|
"""Parse text and cache result. Returns cached result if text unchanged."""
|
||||||
if text != self._cached_text:
|
if text == self._cached_text:
|
||||||
|
return self._cached_wikicode
|
||||||
|
|
||||||
|
old_handler = signal.signal(signal.SIGALRM, self._timeout_handler)
|
||||||
|
signal.alarm(PARSER_TIMEOUT)
|
||||||
|
try:
|
||||||
|
self._cached_wikicode = mwparserfromhell.parse(text)
|
||||||
self._cached_text = text
|
self._cached_text = text
|
||||||
self._cached_wikicode, self.last_parse_timed_out = asyncio.run(self._parse_async(text))
|
self.last_parse_timed_out = False
|
||||||
|
except TimeoutError:
|
||||||
|
self._cached_wikicode = None
|
||||||
|
self._cached_text = text
|
||||||
|
self.last_parse_timed_out = True
|
||||||
|
finally:
|
||||||
|
signal.alarm(0)
|
||||||
|
signal.signal(signal.SIGALRM, old_handler)
|
||||||
|
|
||||||
return self._cached_wikicode
|
return self._cached_wikicode
|
||||||
|
|
||||||
def extract_external_links(self, text: str | None) -> list[str] | None:
|
def extract_external_links(self, text: str | None) -> list[str] | None:
|
||||||
|
|||||||
Reference in New Issue
Block a user