From c7eb374ceb1d1abc364bd597c8dc8b8f3edb580d Mon Sep 17 00:00:00 2001 From: Nathan TeBlunthuis Date: Wed, 7 Jan 2026 12:42:37 -0800 Subject: [PATCH] use signalling to timeout mwparserfromhell instead of asyncio. --- src/wikiq/wikitext_parser.py | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/src/wikiq/wikitext_parser.py b/src/wikiq/wikitext_parser.py index 8acc7be..9c68cec 100644 --- a/src/wikiq/wikitext_parser.py +++ b/src/wikiq/wikitext_parser.py @@ -1,7 +1,8 @@ """Shared wikitext parser with caching to avoid duplicate parsing.""" from __future__ import annotations -import asyncio +import signal + import mwparserfromhell PARSER_TIMEOUT = 60 # seconds @@ -22,22 +23,28 @@ class WikitextParser: self._cached_wikicode = None self.last_parse_timed_out: bool = False - async def _parse_async(self, text: str): - """Parse wikitext with timeout protection.""" - try: - result = await asyncio.wait_for( - asyncio.to_thread(mwparserfromhell.parse, text), - timeout=PARSER_TIMEOUT - ) - return result, False - except TimeoutError: - return None, True + def _timeout_handler(self, signum, frame): + raise TimeoutError("mwparserfromhell parse exceeded timeout") def _get_wikicode(self, text: str): """Parse text and cache result. Returns cached result if text unchanged.""" - if text != self._cached_text: + if text == self._cached_text: + return self._cached_wikicode + + old_handler = signal.signal(signal.SIGALRM, self._timeout_handler) + signal.alarm(PARSER_TIMEOUT) + try: + self._cached_wikicode = mwparserfromhell.parse(text) self._cached_text = text - self._cached_wikicode, self.last_parse_timed_out = asyncio.run(self._parse_async(text)) + self.last_parse_timed_out = False + except TimeoutError: + self._cached_wikicode = None + self._cached_text = text + self.last_parse_timed_out = True + finally: + signal.alarm(0) + signal.signal(signal.SIGALRM, old_handler) + return self._cached_wikicode def extract_external_links(self, text: str | None) -> list[str] | None: