From f3e6cc939266dacf400b19ccaef9c666fadcfbb8 Mon Sep 17 00:00:00 2001 From: Will Beason Date: Wed, 28 May 2025 09:11:36 -0500 Subject: [PATCH] Begin refactor of tests to make new tests easier to write Handle file naming logic centrally rather than requiring a dedicated class per input file. Signed-off-by: Will Beason --- test/Wikiq_Unit_Test.py | 67 +++++++++++++++++++++++------------------ 1 file changed, 38 insertions(+), 29 deletions(-) diff --git a/test/Wikiq_Unit_Test.py b/test/Wikiq_Unit_Test.py index eae8020..1258724 100644 --- a/test/Wikiq_Unit_Test.py +++ b/test/Wikiq_Unit_Test.py @@ -14,6 +14,7 @@ WIKIQ: Final[str] = os.path.join(os.path.dirname(TEST_DIR), "wikiq") TEST_OUTPUT_DIR: Final[str] = os.path.join(TEST_DIR, "test_output") BASELINE_DIR: Final[str] = os.path.join(TEST_DIR, "baseline_output") +IKWIKI: Final[str] = "ikwiki-20180301-pages-meta-history" def setup(): tracemalloc.start() @@ -34,6 +35,23 @@ def setup(): setup() +class WikiqTester: + def __init__(self, + wiki: str, + case_name: str, + out_format: str = "tsv", + ): + self.input_file = os.path.join(TEST_DIR, "dumps", "{0}.xml.bz2".format(wiki)) + + self.wikiq_out_name = "{0}.{1}".format(wiki, out_format) + self.call_output = os.path.join(TEST_OUTPUT_DIR, self.wikiq_out_name) + + self.baseline_file = os.path.join(BASELINE_DIR, "{0}_{1}".format(case_name, self.wikiq_out_name)) + self.test_file = os.path.join(TEST_OUTPUT_DIR, "{0}_{1}".format(case_name, self.wikiq_out_name)) + if os.path.exists(self.test_file): + os.remove(self.test_file) + + def call_wikiq(input_file: str, *args: str, out: bool = True): if out: call = ' '.join([WIKIQ, input_file, "-o", TEST_OUTPUT_DIR, *args]) @@ -69,67 +87,58 @@ def tmp_test_file(name: str) -> (str, str): # wikia and wikipedia data DONE # malformed xmls DONE -class Test_Wikipedia(unittest.TestCase): - def setUp(self): - wiki = 'ikwiki-20180301-pages-meta-history' - self.wikiq_out_name = "{0}.tsv".format(wiki) - self.call_output = os.path.join(TEST_OUTPUT_DIR, self.wikiq_out_name) - - infile = "{0}.xml.bz2".format(wiki) - input_dir = os.path.join(TEST_DIR, "dumps") - self.input_file = os.path.join(TEST_DIR, input_dir, infile) - +class TestWikipedia(unittest.TestCase): def test_WP_url_encode(self): - baseline_file, test_file = tmp_test_file("url-encode_" + self.wikiq_out_name) + tester = WikiqTester(IKWIKI, "url-encode") try: - call_wikiq(self.input_file, "--url-encode") + call_wikiq(tester.input_file, "--url-encode") except subprocess.CalledProcessError as exc: self.fail(exc.stderr.decode("utf8")) - copyfile(self.call_output, test_file) + copyfile(tester.call_output, tester.test_file) # as a test let's make sure that we get equal data frames - test = pd.read_table(test_file) - baseline = pd.read_table(baseline_file) + test = pd.read_table(tester.test_file) + baseline = pd.read_table(tester.baseline_file) assert_frame_equal(test, baseline, check_like=True) def test_WP_namespaces(self): - baseline_file, test_file = tmp_test_file("namespaces_" + self.wikiq_out_name) + tester = WikiqTester(IKWIKI, "namespaces") try: - call_wikiq(self.input_file, "-n 0", "-n 1") + call_wikiq(tester.input_file, "-n 0", "-n 1") except subprocess.CalledProcessError as exc: self.fail(exc.stderr.decode("utf8")) - copyfile(self.call_output, test_file) + copyfile(tester.call_output, tester.test_file) # as a test let's make sure that we get equal data frames - test = pd.read_table(test_file) + test = pd.read_table(tester.test_file) num_wrong_ns = sum(~ test.namespace.isin({0, 1})) self.assertEqual(num_wrong_ns, 0) - baseline = pd.read_table(baseline_file) + baseline = pd.read_table(tester.baseline_file) assert_frame_equal(test, baseline, check_like=True) def test_WP_revert_radius(self): - baseline_file, test_file = tmp_test_file("revert_radius_" + self.wikiq_out_name) + tester = WikiqTester(IKWIKI, "revert_radius") try: - call_wikiq(self.input_file, "-n 0", "-n 1", "-rr 1") + call_wikiq(tester.input_file, "-n 0", "-n 1", "-rr 1") except subprocess.CalledProcessError as exc: self.fail(exc.stderr.decode("utf8")) - copyfile(self.call_output, test_file) + copyfile(tester.call_output, tester.test_file) # as a test let's make sure that we get equal data frames - test = pd.read_table(test_file) + test = pd.read_table(tester.test_file) num_wrong_ns = sum(~ test.namespace.isin({0, 1})) self.assertEqual(num_wrong_ns, 0) - baseline = pd.read_table(baseline_file) + baseline = pd.read_table(tester.baseline_file) assert_frame_equal(test, baseline, check_like=True) -class Test_Basic(unittest.TestCase): +class TestBasic(unittest.TestCase): def setUp(self): wiki = 'sailormoon' @@ -228,7 +237,7 @@ class Test_Basic(unittest.TestCase): assert_frame_equal(test, baseline, check_like=True) -class Test_Malformed(unittest.TestCase): +class TestMalformed(unittest.TestCase): def setUp(self): wiki = 'twinpeaks' @@ -248,7 +257,7 @@ class Test_Malformed(unittest.TestCase): self.fail("No exception raised, want: {}".format(want_exception)) -class Test_Stdout(unittest.TestCase): +class TestStdout(unittest.TestCase): def setUp(self): wiki = 'sailormoon' @@ -276,7 +285,7 @@ class Test_Stdout(unittest.TestCase): assert_frame_equal(test, baseline, check_like=True) -class Test_Regex(unittest.TestCase): +class TestRegex(unittest.TestCase): def setUp(self): wiki = 'regextest' self.wikiq_out_name = wiki + '.tsv'