add logic for resuming after a resume.
This commit is contained in:
@@ -1043,3 +1043,43 @@ def test_resume_simple():
|
||||
assert_frame_equal(resumed_df, full_df, check_like=True, check_dtype=False)
|
||||
|
||||
print(f"Resume simple test passed! Original: {len(full_df)} rows, Resumed: {len(resumed_df)} rows")
|
||||
|
||||
|
||||
def test_resume_merge_with_invalid_temp_file():
|
||||
"""Test that resume handles invalid/empty temp files gracefully.
|
||||
|
||||
This can happen when a namespace has no records after the resume point,
|
||||
resulting in a temp file that was created but never written to.
|
||||
"""
|
||||
import pyarrow.parquet as pq
|
||||
from wikiq.resume import merge_parquet_files, merge_partitioned_namespaces
|
||||
import tempfile
|
||||
|
||||
# Create a valid parquet file
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
original_path = os.path.join(tmpdir, "original.parquet")
|
||||
temp_path = os.path.join(tmpdir, "temp.parquet")
|
||||
merged_path = os.path.join(tmpdir, "merged.parquet")
|
||||
|
||||
# Create a valid original file
|
||||
import pyarrow as pa
|
||||
table = pa.table({"articleid": [1, 2, 3], "revid": [10, 20, 30]})
|
||||
pq.write_table(table, original_path)
|
||||
|
||||
# Create an invalid temp file (empty file, not valid parquet)
|
||||
with open(temp_path, 'w') as f:
|
||||
f.write("")
|
||||
|
||||
# merge_parquet_files should return None for invalid temp file
|
||||
result = merge_parquet_files(original_path, temp_path, merged_path)
|
||||
assert result is None, "Expected None when temp file is invalid"
|
||||
|
||||
# Original file should still exist and be unchanged
|
||||
assert os.path.exists(original_path), "Original file should still exist"
|
||||
original_table = pq.read_table(original_path)
|
||||
assert len(original_table) == 3, "Original file should be unchanged"
|
||||
|
||||
# Merged file should not have been created
|
||||
assert not os.path.exists(merged_path), "Merged file should not be created"
|
||||
|
||||
print("Resume merge with invalid temp file test passed!")
|
||||
|
||||
Reference in New Issue
Block a user