try fixing coro issue.

This commit is contained in:
Nathan TeBlunthuis 2025-08-07 08:58:45 -07:00
parent 9b3237014d
commit 19f67b3679

View File

@ -30,7 +30,7 @@ from wikiq.wiki_diff_matcher import WikiDiffMatcher
TO_ENCODE = ("title", "editor") TO_ENCODE = ("title", "editor")
PERSISTENCE_RADIUS = 7 PERSISTENCE_RADIUS = 7
DIFF_TIMEOUT = 60*20 DIFF_TIMEOUT = 60
from pathlib import Path from pathlib import Path
import pyarrow as pa import pyarrow as pa
@ -52,8 +52,9 @@ async def diff_async(differ, last_text, text):
return differ.inline_json_diff(last_text, text) return differ.inline_json_diff(last_text, text)
try: try:
result = await asyncio.wait_for(_diff(), DIFF_TIMEOUT) result = await asyncio.wait_for(_diff(), DIFF_TIMEOUT)
except TimeoutError as e: except TimeoutError:
raise e print(f"WARNING! wikidiff2 timeout for rev: {row_buffer['revid'][i]}. Falling back to default limits.", file=sys.stderr)
return
return result return result
def calculate_persistence(tokens_added): def calculate_persistence(tokens_added):
@ -677,10 +678,8 @@ class WikiqParser:
last_text = last_rev_text last_text = last_rev_text
new_diffs = [] new_diffs = []
for i, text in enumerate(row_buffer["text"]): for i, text in enumerate(row_buffer["text"]):
try:
diff = asyncio.run(diff_async(differ, last_text, text)) diff = asyncio.run(diff_async(differ, last_text, text))
except TimeoutError: if diff is None:
print(f"WARNING! wikidiff2 timeout for rev: {row_buffer['revid'][i]}. Falling back to default limits.", file=sys.stderr)
diff = fast_differ.inline_json_diff(last_text, text) diff = fast_differ.inline_json_diff(last_text, text)
new_diffs.append(diff) new_diffs.append(diff)
last_text = text last_text = text