use signalling to timeout mwparserfromhell instead of asyncio.
This commit is contained in:
@@ -1,7 +1,8 @@
|
||||
"""Shared wikitext parser with caching to avoid duplicate parsing."""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import signal
|
||||
|
||||
import mwparserfromhell
|
||||
|
||||
PARSER_TIMEOUT = 60 # seconds
|
||||
@@ -22,22 +23,28 @@ class WikitextParser:
|
||||
self._cached_wikicode = None
|
||||
self.last_parse_timed_out: bool = False
|
||||
|
||||
async def _parse_async(self, text: str):
|
||||
"""Parse wikitext with timeout protection."""
|
||||
try:
|
||||
result = await asyncio.wait_for(
|
||||
asyncio.to_thread(mwparserfromhell.parse, text),
|
||||
timeout=PARSER_TIMEOUT
|
||||
)
|
||||
return result, False
|
||||
except TimeoutError:
|
||||
return None, True
|
||||
def _timeout_handler(self, signum, frame):
|
||||
raise TimeoutError("mwparserfromhell parse exceeded timeout")
|
||||
|
||||
def _get_wikicode(self, text: str):
|
||||
"""Parse text and cache result. Returns cached result if text unchanged."""
|
||||
if text != self._cached_text:
|
||||
if text == self._cached_text:
|
||||
return self._cached_wikicode
|
||||
|
||||
old_handler = signal.signal(signal.SIGALRM, self._timeout_handler)
|
||||
signal.alarm(PARSER_TIMEOUT)
|
||||
try:
|
||||
self._cached_wikicode = mwparserfromhell.parse(text)
|
||||
self._cached_text = text
|
||||
self._cached_wikicode, self.last_parse_timed_out = asyncio.run(self._parse_async(text))
|
||||
self.last_parse_timed_out = False
|
||||
except TimeoutError:
|
||||
self._cached_wikicode = None
|
||||
self._cached_text = text
|
||||
self.last_parse_timed_out = True
|
||||
finally:
|
||||
signal.alarm(0)
|
||||
signal.signal(signal.SIGALRM, old_handler)
|
||||
|
||||
return self._cached_wikicode
|
||||
|
||||
def extract_external_links(self, text: str | None) -> list[str] | None:
|
||||
|
||||
Reference in New Issue
Block a user