fix interruption handling by breaking the diff loop.
This commit is contained in:
@@ -875,6 +875,8 @@ class WikiqParser:
|
||||
new_diffs = []
|
||||
diff_timeouts = []
|
||||
for i, text in enumerate(row_buffer["text"]):
|
||||
if self.shutdown_requested:
|
||||
break
|
||||
diff, timed_out = diff_with_timeout(differ, last_text, text)
|
||||
if timed_out:
|
||||
print(f"WARNING! wikidiff2 timeout for rev: {row_buffer['revid'][i]}. Falling back to default limits.", file=sys.stderr)
|
||||
@@ -882,6 +884,9 @@ class WikiqParser:
|
||||
new_diffs.append(diff)
|
||||
diff_timeouts.append(timed_out)
|
||||
last_text = text
|
||||
if self.shutdown_requested:
|
||||
print("Shutdown requested, closing writers...", file=sys.stderr)
|
||||
break
|
||||
row_buffer["diff"] = [
|
||||
[
|
||||
entry
|
||||
|
||||
@@ -191,22 +191,22 @@ def test_resume_with_partition_namespaces():
|
||||
|
||||
|
||||
def test_resume_file_not_found():
|
||||
"""Test that --resume exits with error when output file doesn't exist."""
|
||||
"""Test that --resume starts fresh when output file doesn't exist."""
|
||||
tester = WikiqTester(SAILORMOON, "resume_not_found", in_compression="7z", out_format="parquet")
|
||||
|
||||
expected_output = os.path.join(tester.output, f"{SAILORMOON}.parquet")
|
||||
if os.path.exists(expected_output):
|
||||
os.remove(expected_output)
|
||||
|
||||
try:
|
||||
# Should succeed by starting fresh
|
||||
tester.call_wikiq("--resume")
|
||||
pytest.fail("Expected error when --resume is used but output file doesn't exist")
|
||||
except subprocess.CalledProcessError as exc:
|
||||
stderr = exc.stderr.decode("utf8")
|
||||
assert "Error: --resume specified but output file not found" in stderr, \
|
||||
f"Expected error message about missing output file, got: {stderr}"
|
||||
|
||||
print("Resume file not found test passed!")
|
||||
# Verify output was created
|
||||
assert os.path.exists(expected_output), "Output file should be created when starting fresh"
|
||||
table = pq.read_table(expected_output)
|
||||
assert table.num_rows > 0, "Output should have data"
|
||||
|
||||
print("Resume file not found test passed - started fresh!")
|
||||
|
||||
|
||||
def test_resume_simple():
|
||||
|
||||
Reference in New Issue
Block a user