fix interruption handling by breaking the diff loop.
This commit is contained in:
@@ -875,6 +875,8 @@ class WikiqParser:
|
|||||||
new_diffs = []
|
new_diffs = []
|
||||||
diff_timeouts = []
|
diff_timeouts = []
|
||||||
for i, text in enumerate(row_buffer["text"]):
|
for i, text in enumerate(row_buffer["text"]):
|
||||||
|
if self.shutdown_requested:
|
||||||
|
break
|
||||||
diff, timed_out = diff_with_timeout(differ, last_text, text)
|
diff, timed_out = diff_with_timeout(differ, last_text, text)
|
||||||
if timed_out:
|
if timed_out:
|
||||||
print(f"WARNING! wikidiff2 timeout for rev: {row_buffer['revid'][i]}. Falling back to default limits.", file=sys.stderr)
|
print(f"WARNING! wikidiff2 timeout for rev: {row_buffer['revid'][i]}. Falling back to default limits.", file=sys.stderr)
|
||||||
@@ -882,6 +884,9 @@ class WikiqParser:
|
|||||||
new_diffs.append(diff)
|
new_diffs.append(diff)
|
||||||
diff_timeouts.append(timed_out)
|
diff_timeouts.append(timed_out)
|
||||||
last_text = text
|
last_text = text
|
||||||
|
if self.shutdown_requested:
|
||||||
|
print("Shutdown requested, closing writers...", file=sys.stderr)
|
||||||
|
break
|
||||||
row_buffer["diff"] = [
|
row_buffer["diff"] = [
|
||||||
[
|
[
|
||||||
entry
|
entry
|
||||||
|
|||||||
@@ -191,22 +191,22 @@ def test_resume_with_partition_namespaces():
|
|||||||
|
|
||||||
|
|
||||||
def test_resume_file_not_found():
|
def test_resume_file_not_found():
|
||||||
"""Test that --resume exits with error when output file doesn't exist."""
|
"""Test that --resume starts fresh when output file doesn't exist."""
|
||||||
tester = WikiqTester(SAILORMOON, "resume_not_found", in_compression="7z", out_format="parquet")
|
tester = WikiqTester(SAILORMOON, "resume_not_found", in_compression="7z", out_format="parquet")
|
||||||
|
|
||||||
expected_output = os.path.join(tester.output, f"{SAILORMOON}.parquet")
|
expected_output = os.path.join(tester.output, f"{SAILORMOON}.parquet")
|
||||||
if os.path.exists(expected_output):
|
if os.path.exists(expected_output):
|
||||||
os.remove(expected_output)
|
os.remove(expected_output)
|
||||||
|
|
||||||
try:
|
# Should succeed by starting fresh
|
||||||
tester.call_wikiq("--resume")
|
tester.call_wikiq("--resume")
|
||||||
pytest.fail("Expected error when --resume is used but output file doesn't exist")
|
|
||||||
except subprocess.CalledProcessError as exc:
|
|
||||||
stderr = exc.stderr.decode("utf8")
|
|
||||||
assert "Error: --resume specified but output file not found" in stderr, \
|
|
||||||
f"Expected error message about missing output file, got: {stderr}"
|
|
||||||
|
|
||||||
print("Resume file not found test passed!")
|
# Verify output was created
|
||||||
|
assert os.path.exists(expected_output), "Output file should be created when starting fresh"
|
||||||
|
table = pq.read_table(expected_output)
|
||||||
|
assert table.num_rows > 0, "Output should have data"
|
||||||
|
|
||||||
|
print("Resume file not found test passed - started fresh!")
|
||||||
|
|
||||||
|
|
||||||
def test_resume_simple():
|
def test_resume_simple():
|
||||||
|
|||||||
Reference in New Issue
Block a user