Begin adding test for parquet export
Changed logic for handling anonymous edits so that wikiq handles the type for editor ids consistently. Parquet can mix int64 and None, but not int64 and strings - previously the code used the empty string to denote anonymous editors. Tests failing. Don't merge yet. Signed-off-by: Will Beason <willbeason@gmail.com>
This commit is contained in:
@@ -42,16 +42,21 @@ class WikiqTester:
|
||||
):
|
||||
self.input_file = os.path.join(TEST_DIR, "dumps", "{0}.xml.{1}".format(wiki, in_compression))
|
||||
|
||||
if out_format == "tsv":
|
||||
self.output_dir = TEST_OUTPUT_DIR
|
||||
else:
|
||||
self.output_dir = "{0}.parquet".format(TEST_OUTPUT_DIR)
|
||||
|
||||
if suffix is None:
|
||||
self.wikiq_out_name = "{0}.{1}".format(wiki, out_format)
|
||||
else:
|
||||
self.wikiq_out_name = "{0}_{1}.{2}".format(wiki, suffix, out_format)
|
||||
self.call_output = os.path.join(TEST_OUTPUT_DIR, "{0}.{1}".format(wiki, out_format))
|
||||
self.call_output = os.path.join(self.output_dir, "{0}.{1}".format(wiki, out_format))
|
||||
|
||||
# If case_name is unset, there are no relevant baseline or test files.
|
||||
if case_name is not None:
|
||||
self.baseline_file = os.path.join(BASELINE_DIR, "{0}_{1}".format(case_name, self.wikiq_out_name))
|
||||
self.test_file = os.path.join(TEST_OUTPUT_DIR, "{0}_{1}".format(case_name, self.wikiq_out_name))
|
||||
self.test_file = os.path.join(self.output_dir, "{0}_{1}".format(case_name, self.wikiq_out_name))
|
||||
if os.path.exists(self.test_file):
|
||||
os.remove(self.test_file)
|
||||
|
||||
@@ -63,7 +68,7 @@ class WikiqTester:
|
||||
:return: The output of the wikiq call.
|
||||
"""
|
||||
if out:
|
||||
call = ' '.join([WIKIQ, self.input_file, "-o", TEST_OUTPUT_DIR, *args])
|
||||
call = ' '.join([WIKIQ, self.input_file, "-o", self.output_dir, *args])
|
||||
else:
|
||||
call = ' '.join([WIKIQ, self.input_file, *args])
|
||||
|
||||
@@ -314,6 +319,20 @@ class WikiqTestCase(unittest.TestCase):
|
||||
baseline = pd.read_table(tester.baseline_file)
|
||||
assert_frame_equal(test, baseline, check_like=True)
|
||||
|
||||
def test_parquet(self):
|
||||
tester = WikiqTester(IKWIKI, "parquet", out_format="parquet")
|
||||
|
||||
try:
|
||||
tester.call_wikiq()
|
||||
except subprocess.CalledProcessError as exc:
|
||||
self.fail(exc.stderr.decode("utf8"))
|
||||
|
||||
copyfile(tester.call_output, tester.test_file)
|
||||
|
||||
# as a test let's make sure that we get equal data frames
|
||||
test = pd.read_table(tester.test_file)
|
||||
baseline = pd.read_table(tester.baseline_file)
|
||||
assert_frame_equal(test, baseline, check_like=True)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
||||
Reference in New Issue
Block a user