don't put checkpoint files inside namespace directories.

This commit is contained in:
Nathan TeBlunthuis
2025-12-07 06:24:04 -08:00
parent 783f5fd8bc
commit d1fc094c96
2 changed files with 18 additions and 6 deletions

View File

@@ -322,7 +322,7 @@ class WikiqParser:
"""Open checkpoint file for writing. Keeps file open for performance."""
if not self.output_parquet or output_file == sys.stdout.buffer:
return
checkpoint_path = get_checkpoint_path(output_file)
checkpoint_path = get_checkpoint_path(output_file, self.partition_namespaces)
Path(checkpoint_path).parent.mkdir(parents=True, exist_ok=True)
self.checkpoint_file = open(checkpoint_path, 'w')
print(f"Checkpoint file opened: {checkpoint_path}", file=sys.stderr)