create baseline tests for xml dump processing

This commit is contained in:
Nathan TeBlunthuis 2018-07-03 23:43:47 -07:00
parent ba886ecf4c
commit d2746879d0
6 changed files with 16680 additions and 0 deletions

108
test/Wikiq_Unit_Test.py Normal file
View File

@ -0,0 +1,108 @@
import unittest
import os
import sys
from shutil import copyfile
# with / without pwr DONE
# with / without url encode DONE
# with / without collapse user DONE
# with output to sdtout
# note that the persistence radius is 7 by default
# reading various file formats including
# 7z, gz, bz2, xml
# wikia and wikipedia data
# malformed xmls
class Test_Basic(unittest.TestCase):
def setUp(self):
if not os.path.exists("test_output"):
os.mkdir("test_output")
self.wiki = 'sailormoon'
self.wikiq_out_name = self.wiki + ".tsv"
self.test_output_dir = os.path.join(".", "test_output")
self.call_output = os.path.join(self.test_output_dir, self.wikiq_out_name)
self.infile = "{0}.xml.7z".format(self.wiki)
self.base_call = "../wikiq {0} -o {1}"
self.input_dir = "dumps"
self.input_file = os.path.join(".", self.input_dir,self.infile)
self.baseline_output_dir = "baseline_output"
def test_noargs(self):
call = self.base_call.format(self.input_file, self.test_output_dir)
os.system(call)
test_file = "noargs_" + self.wikiq_out_name
copyfile(self.call_output, os.path.join(self.test_output_dir, test_file))
baseline_file = os.path.join(".", self.baseline_output_dir, test_file)
test_lines = open(os.path.join(self.test_output_dir,test_file))
baseline_lines = open(baseline_file)
for test, baseline in zip(test_lines, baseline_lines):
self.assertEqual(test,baseline)
test_lines.close()
baseline_lines.close()
def test_collapse_user(self):
call = self.base_call.format(self.input_file, self.test_output_dir)
os.system(call)
call = call + " --collapse-user"
os.system(call)
test_file = "collapse-user_" + self.wikiq_out_name
copyfile(self.call_output, os.path.join(self.test_output_dir, test_file))
baseline_file = os.path.join(".", self.baseline_output_dir, test_file)
test_lines = open(os.path.join(self.test_output_dir,test_file))
baseline_lines = open(baseline_file)
for test, baseline in zip(test_lines, baseline_lines):
self.assertEqual(test,baseline)
test_lines.close()
baseline_lines.close()
def test_pwr(self):
call = self.base_call.format(self.input_file, self.test_output_dir)
call = call + " --persistence"
os.system(call)
test_file = "persistence_" + self.wikiq_out_name
copyfile(self.call_output, os.path.join(self.test_output_dir, test_file))
baseline_file = os.path.join(".", self.baseline_output_dir, test_file)
test_lines = open(os.path.join(self.test_output_dir,test_file))
baseline_lines = open(baseline_file)
for test, baseline in zip(test_lines, baseline_lines):
self.assertEqual(test,baseline)
test_lines.close()
baseline_lines.close()
def test_url_encode(self):
call = self.base_call.format(self.input_file, self.test_output_dir)
call = call + " --url-encode"
os.system(call)
test_file = "url-encode_" + self.wikiq_out_name
copyfile(self.call_output, os.path.join(self.test_output_dir, test_file))
baseline_file = os.path.join(".", self.baseline_output_dir, test_file)
test_lines = open(os.path.join(self.test_output_dir,test_file))
baseline_lines = open(baseline_file)
for test, baseline in zip(test_lines, baseline_lines):
self.assertEqual(test,baseline)
test_lines.close()
baseline_lines.close()
if __name__ == '__main__':
unittest.main()

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

Binary file not shown.