start fresh if output and resume are both broken.

This commit is contained in:
Nathan TeBlunthuis 2025-12-10 21:20:52 -08:00
parent 6b4f3939a5
commit 1001c780fa

View File

@ -1248,9 +1248,30 @@ def main():
pageid, revid = resume_point pageid, revid = resume_point
print(f"Resuming from last written point: pageid={pageid}, revid={revid}", file=sys.stderr) print(f"Resuming from last written point: pageid={pageid}, revid={revid}", file=sys.stderr)
else: else:
# resume_point is None - check if file exists but is corrupt
if args.partition_namespaces: if args.partition_namespaces:
partition_dir = os.path.dirname(output_file) partition_dir = os.path.dirname(output_file)
output_filename = os.path.basename(output_file)
corrupt_files = []
if os.path.isdir(partition_dir):
for d in os.listdir(partition_dir):
if d.startswith('namespace='):
filepath = os.path.join(partition_dir, d, output_filename)
if os.path.exists(filepath):
corrupt_files.append(filepath)
if corrupt_files:
print("Output files exist but are corrupt, deleting and starting fresh.", file=sys.stderr)
for filepath in corrupt_files:
os.remove(filepath)
start_fresh = True
else:
sys.exit(f"Error: --resume specified but partitioned output not found in: {partition_dir}") sys.exit(f"Error: --resume specified but partitioned output not found in: {partition_dir}")
else:
if os.path.exists(output_file):
# File exists but is corrupt - start fresh
print(f"Output file {output_file} exists but is corrupt, starting fresh.", file=sys.stderr)
os.remove(output_file)
start_fresh = True
else: else:
sys.exit(f"Error: --resume specified but output file not found: {output_file}") sys.exit(f"Error: --resume specified but output file not found: {output_file}")
else: else: