more work on resuming.

This commit is contained in:
Nathan TeBlunthuis
2025-12-10 21:07:52 -08:00
parent c3d31b4ab5
commit 6b4f3939a5
5 changed files with 512 additions and 532 deletions

View File

@@ -76,14 +76,17 @@ def cleanup_interrupted_resume(output_file, partition_namespaces):
# Temp file was invalid, just remove it
os.remove(temp_output_file)
elif merged == "temp_only":
# Original was corrupted, use temp as new base
os.remove(output_file)
# Original was corrupted or missing, use temp as new base
if os.path.exists(output_file):
os.remove(output_file)
os.rename(temp_output_file, output_file)
print("Recovered from temp file (original was corrupted).", file=sys.stderr)
print("Recovered from temp file (original was corrupted or missing).", file=sys.stderr)
elif merged == "both_invalid":
# Both files corrupted, remove both and start fresh
os.remove(output_file)
os.remove(temp_output_file)
# Both files corrupted or missing, remove both and start fresh
if os.path.exists(output_file):
os.remove(output_file)
if os.path.exists(temp_output_file):
os.remove(temp_output_file)
# Also remove stale checkpoint file
checkpoint_path = get_checkpoint_path(output_file, partition_namespaces)
if os.path.exists(checkpoint_path):