76 lines
2.6 KiB
Python
76 lines
2.6 KiB
Python
import os
|
|
import shutil
|
|
import subprocess
|
|
from typing import Final, Union
|
|
|
|
TEST_DIR: Final[str] = os.path.dirname(os.path.realpath(__file__))
|
|
WIKIQ: Final[str] = os.path.join(os.path.join(TEST_DIR, ".."), "src/wikiq/__init__.py")
|
|
TEST_OUTPUT_DIR: Final[str] = os.path.join(TEST_DIR, "test_output")
|
|
BASELINE_DIR: Final[str] = os.path.join(TEST_DIR, "baseline_output")
|
|
|
|
IKWIKI: Final[str] = "ikwiki-20180301-pages-meta-history"
|
|
SAILORMOON: Final[str] = "sailormoon"
|
|
TWINPEAKS: Final[str] = "twinpeaks"
|
|
REGEXTEST: Final[str] = "regextest"
|
|
|
|
|
|
class WikiqTester:
|
|
def __init__(
|
|
self,
|
|
wiki: str,
|
|
case_name: str,
|
|
suffix: Union[str, None] = None,
|
|
in_compression: str = "bz2",
|
|
baseline_format: str = "tsv",
|
|
out_format: str = "tsv",
|
|
):
|
|
self.input_file = os.path.join(
|
|
TEST_DIR, "dumps", "{0}.xml.{1}".format(wiki, in_compression)
|
|
)
|
|
|
|
basename = "{0}_{1}".format(case_name, wiki)
|
|
if suffix:
|
|
basename = "{0}_{1}".format(basename, suffix)
|
|
|
|
self.output = os.path.join(
|
|
TEST_OUTPUT_DIR, "{0}.{1}".format(basename, out_format)
|
|
)
|
|
|
|
if os.path.exists(self.output):
|
|
if os.path.isfile(self.output):
|
|
os.remove(self.output)
|
|
else:
|
|
shutil.rmtree(self.output)
|
|
|
|
if out_format == "parquet":
|
|
os.makedirs(self.output, exist_ok=True)
|
|
|
|
if suffix is None:
|
|
self.wikiq_baseline_name = "{0}.{1}".format(wiki, baseline_format)
|
|
self.wikiq_out_name = "{0}.{1}".format(wiki, out_format)
|
|
else:
|
|
self.wikiq_baseline_name = "{0}_{1}.{2}".format(
|
|
wiki, suffix, baseline_format
|
|
)
|
|
self.wikiq_out_name = "{0}_{1}.{2}".format(wiki, suffix, out_format)
|
|
|
|
if case_name is not None:
|
|
self.baseline_file = os.path.join(
|
|
BASELINE_DIR, "{0}_{1}".format(case_name, self.wikiq_baseline_name)
|
|
)
|
|
|
|
def call_wikiq(self, *args: str, out: bool = True):
|
|
"""
|
|
Calls wikiq with the passed arguments on the input file relevant to the test.
|
|
:param args: The command line arguments to pass to wikiq.
|
|
:param out: Whether to pass an output argument to wikiq.
|
|
:return: The output of the wikiq call.
|
|
"""
|
|
if out:
|
|
call = " ".join([WIKIQ, self.input_file, "-o", self.output, "--batch-size", "10", *args])
|
|
else:
|
|
call = " ".join([WIKIQ, self.input_file, "--batch-size", "10", *args])
|
|
|
|
print(call)
|
|
return subprocess.check_output(call, stderr=subprocess.PIPE, shell=True)
|