diff --git a/src/wikiq/wikitext_parser.py b/src/wikiq/wikitext_parser.py index 8acc7be..9c68cec 100644 --- a/src/wikiq/wikitext_parser.py +++ b/src/wikiq/wikitext_parser.py @@ -1,7 +1,8 @@ """Shared wikitext parser with caching to avoid duplicate parsing.""" from __future__ import annotations -import asyncio +import signal + import mwparserfromhell PARSER_TIMEOUT = 60 # seconds @@ -22,22 +23,28 @@ class WikitextParser: self._cached_wikicode = None self.last_parse_timed_out: bool = False - async def _parse_async(self, text: str): - """Parse wikitext with timeout protection.""" - try: - result = await asyncio.wait_for( - asyncio.to_thread(mwparserfromhell.parse, text), - timeout=PARSER_TIMEOUT - ) - return result, False - except TimeoutError: - return None, True + def _timeout_handler(self, signum, frame): + raise TimeoutError("mwparserfromhell parse exceeded timeout") def _get_wikicode(self, text: str): """Parse text and cache result. Returns cached result if text unchanged.""" - if text != self._cached_text: + if text == self._cached_text: + return self._cached_wikicode + + old_handler = signal.signal(signal.SIGALRM, self._timeout_handler) + signal.alarm(PARSER_TIMEOUT) + try: + self._cached_wikicode = mwparserfromhell.parse(text) self._cached_text = text - self._cached_wikicode, self.last_parse_timed_out = asyncio.run(self._parse_async(text)) + self.last_parse_timed_out = False + except TimeoutError: + self._cached_wikicode = None + self._cached_text = text + self.last_parse_timed_out = True + finally: + signal.alarm(0) + signal.signal(signal.SIGALRM, old_handler) + return self._cached_wikicode def extract_external_links(self, text: str | None) -> list[str] | None: