refactor and enable jsonl output.
This commit is contained in:
@@ -42,8 +42,20 @@ class WikiqTester:
|
||||
else:
|
||||
shutil.rmtree(self.output)
|
||||
|
||||
if out_format == "parquet":
|
||||
os.makedirs(self.output, exist_ok=True)
|
||||
# Also clean up resume-related files
|
||||
for suffix in [".resume_temp", ".checkpoint", ".merged"]:
|
||||
temp_path = self.output + suffix
|
||||
if os.path.exists(temp_path):
|
||||
if os.path.isfile(temp_path):
|
||||
os.remove(temp_path)
|
||||
else:
|
||||
shutil.rmtree(temp_path)
|
||||
|
||||
# For JSONL and Parquet, self.output is a file path. Create parent directory if needed.
|
||||
if out_format in ("jsonl", "parquet"):
|
||||
parent_dir = os.path.dirname(self.output)
|
||||
if parent_dir:
|
||||
os.makedirs(parent_dir, exist_ok=True)
|
||||
|
||||
if suffix is None:
|
||||
self.wikiq_baseline_name = "{0}.{1}".format(wiki, baseline_format)
|
||||
|
||||
Reference in New Issue
Block a user