don't try to remove files that don't exist.
This commit is contained in:
parent
70a10db228
commit
5d1a246898
@ -292,10 +292,13 @@ def merge_parquet_files(original_path, temp_path, merged_path):
|
||||
print(f"Warning: Original file {original_path} is corrupted or invalid: {e}", file=sys.stderr)
|
||||
|
||||
try:
|
||||
temp_pq = pq.ParquetFile(temp_path)
|
||||
temp_valid = True
|
||||
if not os.path.exists(temp_path):
|
||||
print(f"Note: Temp file {temp_path} does not exist (namespace had no records after resume point)", file=sys.stderr)
|
||||
else:
|
||||
temp_pq = pq.ParquetFile(temp_path)
|
||||
temp_valid = True
|
||||
except Exception:
|
||||
print(f"Note: No new data in temp file {temp_path} (namespace had no records after resume point)", file=sys.stderr)
|
||||
print(f"Note: No new data in temp file {temp_path} (file exists but is invalid)", file=sys.stderr)
|
||||
|
||||
if not original_valid and not temp_valid:
|
||||
print(f"Both original and temp files are invalid, will start fresh", file=sys.stderr)
|
||||
@ -380,33 +383,40 @@ def merge_partitioned_namespaces(partition_dir, temp_suffix):
|
||||
|
||||
if merged == "original_only":
|
||||
# Temp file was invalid (no new data), keep original unchanged
|
||||
os.remove(temp_path)
|
||||
if os.path.exists(temp_path):
|
||||
os.remove(temp_path)
|
||||
elif merged == "temp_only":
|
||||
# Original was corrupted, use temp as new base
|
||||
os.remove(original_path)
|
||||
os.rename(temp_path, original_path)
|
||||
elif merged == "both_invalid":
|
||||
# Both files corrupted, remove both
|
||||
os.remove(original_path)
|
||||
os.remove(temp_path)
|
||||
if os.path.exists(original_path):
|
||||
os.remove(original_path)
|
||||
if os.path.exists(temp_path):
|
||||
os.remove(temp_path)
|
||||
had_corruption = True
|
||||
elif merged == "merged":
|
||||
# Replace the original file with the merged file
|
||||
os.remove(original_path)
|
||||
os.rename(merged_path, original_path)
|
||||
os.remove(temp_path)
|
||||
if os.path.exists(temp_path):
|
||||
os.remove(temp_path)
|
||||
else:
|
||||
# Both files were empty (False), just remove them
|
||||
os.remove(original_path)
|
||||
os.remove(temp_path)
|
||||
if os.path.exists(original_path):
|
||||
os.remove(original_path)
|
||||
if os.path.exists(temp_path):
|
||||
os.remove(temp_path)
|
||||
else:
|
||||
# No original file, rename temp to original only if valid
|
||||
try:
|
||||
pq.ParquetFile(temp_path)
|
||||
os.rename(temp_path, original_path)
|
||||
except Exception:
|
||||
# Temp file invalid, just remove it
|
||||
os.remove(temp_path)
|
||||
# Temp file invalid or missing, just remove it if it exists
|
||||
if os.path.exists(temp_path):
|
||||
os.remove(temp_path)
|
||||
had_corruption = True
|
||||
|
||||
return had_corruption
|
||||
|
||||
Loading…
Reference in New Issue
Block a user