start fresh if output and resume are both broken.

This commit is contained in:
Nathan TeBlunthuis 2025-12-10 21:20:52 -08:00
parent 6b4f3939a5
commit 1001c780fa

View File

@ -1248,11 +1248,32 @@ def main():
pageid, revid = resume_point
print(f"Resuming from last written point: pageid={pageid}, revid={revid}", file=sys.stderr)
else:
# resume_point is None - check if file exists but is corrupt
if args.partition_namespaces:
partition_dir = os.path.dirname(output_file)
sys.exit(f"Error: --resume specified but partitioned output not found in: {partition_dir}")
output_filename = os.path.basename(output_file)
corrupt_files = []
if os.path.isdir(partition_dir):
for d in os.listdir(partition_dir):
if d.startswith('namespace='):
filepath = os.path.join(partition_dir, d, output_filename)
if os.path.exists(filepath):
corrupt_files.append(filepath)
if corrupt_files:
print("Output files exist but are corrupt, deleting and starting fresh.", file=sys.stderr)
for filepath in corrupt_files:
os.remove(filepath)
start_fresh = True
else:
sys.exit(f"Error: --resume specified but partitioned output not found in: {partition_dir}")
else:
sys.exit(f"Error: --resume specified but output file not found: {output_file}")
if os.path.exists(output_file):
# File exists but is corrupt - start fresh
print(f"Output file {output_file} exists but is corrupt, starting fresh.", file=sys.stderr)
os.remove(output_file)
start_fresh = True
else:
sys.exit(f"Error: --resume specified but output file not found: {output_file}")
else:
sys.exit("Error: --resume only works with parquet output (not stdout or TSV)")