fix interruption handling by breaking the diff loop.

This commit is contained in:
Nathan TeBlunthuis
2025-12-18 18:00:30 -08:00
parent d7f5abef2d
commit 006feb795c
2 changed files with 14 additions and 9 deletions

View File

@@ -875,6 +875,8 @@ class WikiqParser:
new_diffs = []
diff_timeouts = []
for i, text in enumerate(row_buffer["text"]):
if self.shutdown_requested:
break
diff, timed_out = diff_with_timeout(differ, last_text, text)
if timed_out:
print(f"WARNING! wikidiff2 timeout for rev: {row_buffer['revid'][i]}. Falling back to default limits.", file=sys.stderr)
@@ -882,6 +884,9 @@ class WikiqParser:
new_diffs.append(diff)
diff_timeouts.append(timed_out)
last_text = text
if self.shutdown_requested:
print("Shutdown requested, closing writers...", file=sys.stderr)
break
row_buffer["diff"] = [
[
entry

View File

@@ -191,22 +191,22 @@ def test_resume_with_partition_namespaces():
def test_resume_file_not_found():
"""Test that --resume exits with error when output file doesn't exist."""
"""Test that --resume starts fresh when output file doesn't exist."""
tester = WikiqTester(SAILORMOON, "resume_not_found", in_compression="7z", out_format="parquet")
expected_output = os.path.join(tester.output, f"{SAILORMOON}.parquet")
if os.path.exists(expected_output):
os.remove(expected_output)
try:
tester.call_wikiq("--resume")
pytest.fail("Expected error when --resume is used but output file doesn't exist")
except subprocess.CalledProcessError as exc:
stderr = exc.stderr.decode("utf8")
assert "Error: --resume specified but output file not found" in stderr, \
f"Expected error message about missing output file, got: {stderr}"
# Should succeed by starting fresh
tester.call_wikiq("--resume")
print("Resume file not found test passed!")
# Verify output was created
assert os.path.exists(expected_output), "Output file should be created when starting fresh"
table = pq.read_table(expected_output)
assert table.num_rows > 0, "Output should have data"
print("Resume file not found test passed - started fresh!")
def test_resume_simple():