add logic for resuming after a resume.

This commit is contained in:
Nathan TeBlunthuis
2025-12-10 19:26:54 -08:00
parent d1fc094c96
commit f427291fd8
3 changed files with 132 additions and 16 deletions

View File

@@ -34,6 +34,7 @@ from wikiq.resume import (
setup_resume_temp_output,
finalize_resume_merge,
get_checkpoint_path,
cleanup_interrupted_resume,
)
TO_ENCODE = ("title", "editor")
@@ -1227,6 +1228,8 @@ def main():
resume_point = None
if args.resume:
if output_parquet and not args.stdout:
# First, merge any leftover temp files from a previous interrupted run
cleanup_interrupted_resume(output_file, args.partition_namespaces)
resume_point = get_resume_point(output_file, args.partition_namespaces)
if resume_point is not None:
if args.partition_namespaces: