diff --git a/src/wikiq/__init__.py b/src/wikiq/__init__.py index 723469f..ae1efc0 100755 --- a/src/wikiq/__init__.py +++ b/src/wikiq/__init__.py @@ -875,6 +875,8 @@ class WikiqParser: new_diffs = [] diff_timeouts = [] for i, text in enumerate(row_buffer["text"]): + if self.shutdown_requested: + break diff, timed_out = diff_with_timeout(differ, last_text, text) if timed_out: print(f"WARNING! wikidiff2 timeout for rev: {row_buffer['revid'][i]}. Falling back to default limits.", file=sys.stderr) @@ -882,6 +884,9 @@ class WikiqParser: new_diffs.append(diff) diff_timeouts.append(timed_out) last_text = text + if self.shutdown_requested: + print("Shutdown requested, closing writers...", file=sys.stderr) + break row_buffer["diff"] = [ [ entry diff --git a/test/test_resume.py b/test/test_resume.py index 6bc96c7..b99661d 100644 --- a/test/test_resume.py +++ b/test/test_resume.py @@ -191,22 +191,22 @@ def test_resume_with_partition_namespaces(): def test_resume_file_not_found(): - """Test that --resume exits with error when output file doesn't exist.""" + """Test that --resume starts fresh when output file doesn't exist.""" tester = WikiqTester(SAILORMOON, "resume_not_found", in_compression="7z", out_format="parquet") expected_output = os.path.join(tester.output, f"{SAILORMOON}.parquet") if os.path.exists(expected_output): os.remove(expected_output) - try: - tester.call_wikiq("--resume") - pytest.fail("Expected error when --resume is used but output file doesn't exist") - except subprocess.CalledProcessError as exc: - stderr = exc.stderr.decode("utf8") - assert "Error: --resume specified but output file not found" in stderr, \ - f"Expected error message about missing output file, got: {stderr}" + # Should succeed by starting fresh + tester.call_wikiq("--resume") - print("Resume file not found test passed!") + # Verify output was created + assert os.path.exists(expected_output), "Output file should be created when starting fresh" + table = pq.read_table(expected_output) + assert table.num_rows > 0, "Output should have data" + + print("Resume file not found test passed - started fresh!") def test_resume_simple():