Remove resource leaks from tests

Close subprocesses within tests to fix resource leak warning.

Signed-off-by: Will Beason <willbeason@gmail.com>
This commit is contained in:
Will Beason 2025-05-26 15:08:47 -05:00
parent 09a84e7d11
commit 6d133575c7
2 changed files with 60 additions and 41 deletions

View File

@ -44,8 +44,10 @@ class Test_Wikipedia(unittest.TestCase):
call = self.base_call.format(self.input_file, self.test_output_dir)
call = call + " --url-encode"
proc = subprocess.Popen(call, stdout=subprocess.PIPE, shell=True)
proc.wait()
print(call)
with subprocess.Popen(call, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) as proc:
proc.wait()
assert (proc.returncode == 0)
copyfile(self.call_output, test_file)
baseline_file = os.path.join(".", self.baseline_output_dir, test_filename)
@ -65,8 +67,10 @@ class Test_Wikipedia(unittest.TestCase):
call = self.base_call.format(self.input_file, self.test_output_dir)
call = call + " -n 0 -n 1"
print(call)
proc = subprocess.Popen(call, stdout=subprocess.PIPE, shell=True)
proc.wait()
with subprocess.Popen(call, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) as proc:
proc.wait()
assert (proc.returncode == 0)
copyfile(self.call_output, test_file)
baseline_file = os.path.join(os.path.abspath("."), self.baseline_output_dir, test_filename)
@ -87,8 +91,10 @@ class Test_Wikipedia(unittest.TestCase):
call = self.base_call.format(self.input_file, self.test_output_dir)
call = call + " -n 0 -n 1 -rr 1"
print(call)
proc = subprocess.Popen(call, stdout=subprocess.PIPE, shell=True)
proc.wait()
with subprocess.Popen(call, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) as proc:
proc.wait()
assert (proc.returncode == 0)
copyfile(self.call_output, test_file)
baseline_file = os.path.join(os.path.abspath("."), self.baseline_output_dir, test_filename)
@ -125,8 +131,10 @@ class Test_Basic(unittest.TestCase):
os.remove(test_file)
call = self.base_call.format(self.input_file, self.test_output_dir)
proc = subprocess.Popen(call, stdout=subprocess.PIPE, shell=True)
proc.wait()
print(call)
with subprocess.Popen(call, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) as proc:
proc.wait()
assert (proc.returncode == 0)
copyfile(self.call_output, test_file)
@ -144,9 +152,10 @@ class Test_Basic(unittest.TestCase):
call = self.base_call.format(self.input_file, self.test_output_dir)
call = call + " --collapse-user"
proc = subprocess.Popen(call, stdout=subprocess.PIPE, shell=True)
proc.wait()
print(call)
with subprocess.Popen(call, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) as proc:
proc.wait()
assert (proc.returncode == 0)
copyfile(self.call_output, test_file)
@ -163,8 +172,10 @@ class Test_Basic(unittest.TestCase):
call = self.base_call.format(self.input_file, self.test_output_dir)
call = call + " --persistence segment"
proc = subprocess.Popen(call, stdout=subprocess.PIPE, shell=True)
proc.wait()
print(call)
with subprocess.Popen(call, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) as proc:
proc.wait()
assert (proc.returncode == 0)
copyfile(self.call_output, test_file)
@ -182,8 +193,10 @@ class Test_Basic(unittest.TestCase):
call = self.base_call.format(self.input_file, self.test_output_dir)
call = call + " --persistence legacy"
proc = subprocess.Popen(call, stdout=subprocess.PIPE, shell=True)
proc.wait()
print(call)
with subprocess.Popen(call, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) as proc:
proc.wait()
assert (proc.returncode == 0)
copyfile(self.call_output, test_file)
@ -201,8 +214,10 @@ class Test_Basic(unittest.TestCase):
call = self.base_call.format(self.input_file, self.test_output_dir)
call = call + " --persistence"
proc = subprocess.Popen(call, stdout=subprocess.PIPE, shell=True)
proc.wait()
print(call)
with subprocess.Popen(call, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) as proc:
proc.wait()
assert (proc.returncode == 0)
copyfile(self.call_output, test_file)
@ -223,8 +238,9 @@ class Test_Basic(unittest.TestCase):
call = self.base_call.format(self.input_file, self.test_output_dir)
call = call + " --url-encode"
proc = subprocess.Popen(call, stdout=subprocess.PIPE, shell=True)
proc.wait()
with subprocess.Popen(call, stdout=subprocess.PIPE, shell=True) as proc:
proc.wait()
assert (proc.returncode == 0)
copyfile(self.call_output, test_file)
baseline_file = os.path.join(".", self.baseline_output_dir, test_filename)
@ -252,11 +268,13 @@ class Test_Malformed(unittest.TestCase):
def test_malformed_noargs(self):
call = self.base_call.format(self.input_file, self.test_output_dir)
proc = subprocess.Popen(call, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
proc.wait()
outs, errs = proc.communicate()
errlines = str(errs).split("\\n")
self.assertEqual(errlines[-2], 'xml.etree.ElementTree.ParseError: no element found: line 1369, column 0')
print(call)
with subprocess.Popen(call, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) as proc:
proc.wait()
self.assertNotEqual(proc.returncode, 0)
outs, errs = proc.communicate()
errlines = str(errs).split("\\n")
self.assertEqual(errlines[-2], 'xml.etree.ElementTree.ParseError: no element found: line 1369, column 0')
class Test_Stdout(unittest.TestCase):
@ -337,13 +355,12 @@ class Test_Regex(unittest.TestCase):
call = self.base_call.format(self.input_file)
call = call + " --stdout " + input
print(call)
proc = subprocess.Popen(call, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
stdout, stderr = proc.communicate()
# print(proc.returncode)
with subprocess.Popen(call, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) as proc:
stdout, stderr = proc.communicate()
# we want to check that the bad inputs were caught and sys.exit is stopping the code
print(stderr.decode("utf-8"))
# we want to check that the bad inputs were caught and sys.exit is stopping the code
print(stderr.decode("utf-8"))
self.assertNotEqual(proc.returncode, 0)
self.assertNotEqual(proc.returncode, 0)
def test_basic_regex(self):
for i, input in enumerate(self.good_inputs_list):
@ -357,9 +374,10 @@ class Test_Regex(unittest.TestCase):
call = self.base_call_outs.format(self.input_file, self.test_output_dir)
call = call + " " + input
print(call)
with subprocess.Popen(call, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) as proc:
proc.wait()
assert (proc.returncode == 0)
proc = subprocess.Popen(call, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
proc.wait()
copyfile(self.call_output, test_file)
test = pd.read_table(test_file)
@ -381,8 +399,10 @@ class Test_Regex(unittest.TestCase):
call = call + " " + input
print(call)
proc = subprocess.Popen(call, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
proc.wait()
print(call)
with subprocess.Popen(call, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) as proc:
proc.wait()
assert (proc.returncode == 0)
copyfile(self.call_output, test_file)

13
wikiq
View File

@ -97,6 +97,7 @@ class WikiqPage():
# 3 A B True
# 4 A A False
# Post-loop A Always
collapsed_revs = 0
for i, rev in enumerate(self.mwpage):
# never yield the first time
if i == 0:
@ -491,7 +492,7 @@ class WikiqParser:
# Construct dump file iterator
dump = WikiqIterator(self.input_file, collapse_user=self.collapse_user)
# extract list of namspaces
# extract list of namespaces
self.namespaces = {ns.name: ns.id for ns in dump.mwiterator.site_info.namespaces}
page_count = 0
@ -703,7 +704,6 @@ class WikiqParser:
line = rev_data.to_tsv_row()
print(line, file=self.output_file)
def open_input_file(input_filename):
if re.match(r'.*\.7z$', input_filename):
cmd = ["7za", "x", "-so", input_filename, "*.xml"]
@ -711,14 +711,13 @@ def open_input_file(input_filename):
cmd = ["zcat", input_filename]
elif re.match(r'.*\.bz2$', input_filename):
cmd = ["bzcat", "-dk", input_filename]
else:
raise ValueError("Unrecognized file type: %s" % input_filename)
try:
input_file = Popen(cmd, stdout=PIPE).stdout
return Popen(cmd, stdout=PIPE).stdout
except NameError:
input_file = open(input_filename, 'r')
return input_file
return open(input_filename, 'r')
def get_output_filename(input_filename, parquet=False):
output_filename = re.sub(r'\.(7z|gz|bz2)?$', '', input_filename)