Remove resource leaks from tests
Close subprocesses within tests to fix resource leak warning. Signed-off-by: Will Beason <willbeason@gmail.com>
This commit is contained in:
parent
09a84e7d11
commit
6d133575c7
@ -44,8 +44,10 @@ class Test_Wikipedia(unittest.TestCase):
|
|||||||
|
|
||||||
call = self.base_call.format(self.input_file, self.test_output_dir)
|
call = self.base_call.format(self.input_file, self.test_output_dir)
|
||||||
call = call + " --url-encode"
|
call = call + " --url-encode"
|
||||||
proc = subprocess.Popen(call, stdout=subprocess.PIPE, shell=True)
|
print(call)
|
||||||
proc.wait()
|
with subprocess.Popen(call, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) as proc:
|
||||||
|
proc.wait()
|
||||||
|
assert (proc.returncode == 0)
|
||||||
|
|
||||||
copyfile(self.call_output, test_file)
|
copyfile(self.call_output, test_file)
|
||||||
baseline_file = os.path.join(".", self.baseline_output_dir, test_filename)
|
baseline_file = os.path.join(".", self.baseline_output_dir, test_filename)
|
||||||
@ -65,8 +67,10 @@ class Test_Wikipedia(unittest.TestCase):
|
|||||||
call = self.base_call.format(self.input_file, self.test_output_dir)
|
call = self.base_call.format(self.input_file, self.test_output_dir)
|
||||||
call = call + " -n 0 -n 1"
|
call = call + " -n 0 -n 1"
|
||||||
print(call)
|
print(call)
|
||||||
proc = subprocess.Popen(call, stdout=subprocess.PIPE, shell=True)
|
with subprocess.Popen(call, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) as proc:
|
||||||
proc.wait()
|
proc.wait()
|
||||||
|
assert (proc.returncode == 0)
|
||||||
|
|
||||||
copyfile(self.call_output, test_file)
|
copyfile(self.call_output, test_file)
|
||||||
baseline_file = os.path.join(os.path.abspath("."), self.baseline_output_dir, test_filename)
|
baseline_file = os.path.join(os.path.abspath("."), self.baseline_output_dir, test_filename)
|
||||||
|
|
||||||
@ -87,8 +91,10 @@ class Test_Wikipedia(unittest.TestCase):
|
|||||||
call = self.base_call.format(self.input_file, self.test_output_dir)
|
call = self.base_call.format(self.input_file, self.test_output_dir)
|
||||||
call = call + " -n 0 -n 1 -rr 1"
|
call = call + " -n 0 -n 1 -rr 1"
|
||||||
print(call)
|
print(call)
|
||||||
proc = subprocess.Popen(call, stdout=subprocess.PIPE, shell=True)
|
with subprocess.Popen(call, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) as proc:
|
||||||
proc.wait()
|
proc.wait()
|
||||||
|
assert (proc.returncode == 0)
|
||||||
|
|
||||||
copyfile(self.call_output, test_file)
|
copyfile(self.call_output, test_file)
|
||||||
baseline_file = os.path.join(os.path.abspath("."), self.baseline_output_dir, test_filename)
|
baseline_file = os.path.join(os.path.abspath("."), self.baseline_output_dir, test_filename)
|
||||||
|
|
||||||
@ -125,8 +131,10 @@ class Test_Basic(unittest.TestCase):
|
|||||||
os.remove(test_file)
|
os.remove(test_file)
|
||||||
|
|
||||||
call = self.base_call.format(self.input_file, self.test_output_dir)
|
call = self.base_call.format(self.input_file, self.test_output_dir)
|
||||||
proc = subprocess.Popen(call, stdout=subprocess.PIPE, shell=True)
|
print(call)
|
||||||
proc.wait()
|
with subprocess.Popen(call, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) as proc:
|
||||||
|
proc.wait()
|
||||||
|
assert (proc.returncode == 0)
|
||||||
|
|
||||||
copyfile(self.call_output, test_file)
|
copyfile(self.call_output, test_file)
|
||||||
|
|
||||||
@ -144,9 +152,10 @@ class Test_Basic(unittest.TestCase):
|
|||||||
|
|
||||||
call = self.base_call.format(self.input_file, self.test_output_dir)
|
call = self.base_call.format(self.input_file, self.test_output_dir)
|
||||||
call = call + " --collapse-user"
|
call = call + " --collapse-user"
|
||||||
|
print(call)
|
||||||
proc = subprocess.Popen(call, stdout=subprocess.PIPE, shell=True)
|
with subprocess.Popen(call, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) as proc:
|
||||||
proc.wait()
|
proc.wait()
|
||||||
|
assert (proc.returncode == 0)
|
||||||
|
|
||||||
copyfile(self.call_output, test_file)
|
copyfile(self.call_output, test_file)
|
||||||
|
|
||||||
@ -163,8 +172,10 @@ class Test_Basic(unittest.TestCase):
|
|||||||
|
|
||||||
call = self.base_call.format(self.input_file, self.test_output_dir)
|
call = self.base_call.format(self.input_file, self.test_output_dir)
|
||||||
call = call + " --persistence segment"
|
call = call + " --persistence segment"
|
||||||
proc = subprocess.Popen(call, stdout=subprocess.PIPE, shell=True)
|
print(call)
|
||||||
proc.wait()
|
with subprocess.Popen(call, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) as proc:
|
||||||
|
proc.wait()
|
||||||
|
assert (proc.returncode == 0)
|
||||||
|
|
||||||
copyfile(self.call_output, test_file)
|
copyfile(self.call_output, test_file)
|
||||||
|
|
||||||
@ -182,8 +193,10 @@ class Test_Basic(unittest.TestCase):
|
|||||||
|
|
||||||
call = self.base_call.format(self.input_file, self.test_output_dir)
|
call = self.base_call.format(self.input_file, self.test_output_dir)
|
||||||
call = call + " --persistence legacy"
|
call = call + " --persistence legacy"
|
||||||
proc = subprocess.Popen(call, stdout=subprocess.PIPE, shell=True)
|
print(call)
|
||||||
proc.wait()
|
with subprocess.Popen(call, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) as proc:
|
||||||
|
proc.wait()
|
||||||
|
assert (proc.returncode == 0)
|
||||||
|
|
||||||
copyfile(self.call_output, test_file)
|
copyfile(self.call_output, test_file)
|
||||||
|
|
||||||
@ -201,8 +214,10 @@ class Test_Basic(unittest.TestCase):
|
|||||||
|
|
||||||
call = self.base_call.format(self.input_file, self.test_output_dir)
|
call = self.base_call.format(self.input_file, self.test_output_dir)
|
||||||
call = call + " --persistence"
|
call = call + " --persistence"
|
||||||
proc = subprocess.Popen(call, stdout=subprocess.PIPE, shell=True)
|
print(call)
|
||||||
proc.wait()
|
with subprocess.Popen(call, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) as proc:
|
||||||
|
proc.wait()
|
||||||
|
assert (proc.returncode == 0)
|
||||||
|
|
||||||
copyfile(self.call_output, test_file)
|
copyfile(self.call_output, test_file)
|
||||||
|
|
||||||
@ -223,8 +238,9 @@ class Test_Basic(unittest.TestCase):
|
|||||||
|
|
||||||
call = self.base_call.format(self.input_file, self.test_output_dir)
|
call = self.base_call.format(self.input_file, self.test_output_dir)
|
||||||
call = call + " --url-encode"
|
call = call + " --url-encode"
|
||||||
proc = subprocess.Popen(call, stdout=subprocess.PIPE, shell=True)
|
with subprocess.Popen(call, stdout=subprocess.PIPE, shell=True) as proc:
|
||||||
proc.wait()
|
proc.wait()
|
||||||
|
assert (proc.returncode == 0)
|
||||||
|
|
||||||
copyfile(self.call_output, test_file)
|
copyfile(self.call_output, test_file)
|
||||||
baseline_file = os.path.join(".", self.baseline_output_dir, test_filename)
|
baseline_file = os.path.join(".", self.baseline_output_dir, test_filename)
|
||||||
@ -252,11 +268,13 @@ class Test_Malformed(unittest.TestCase):
|
|||||||
|
|
||||||
def test_malformed_noargs(self):
|
def test_malformed_noargs(self):
|
||||||
call = self.base_call.format(self.input_file, self.test_output_dir)
|
call = self.base_call.format(self.input_file, self.test_output_dir)
|
||||||
proc = subprocess.Popen(call, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
|
print(call)
|
||||||
proc.wait()
|
with subprocess.Popen(call, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) as proc:
|
||||||
outs, errs = proc.communicate()
|
proc.wait()
|
||||||
errlines = str(errs).split("\\n")
|
self.assertNotEqual(proc.returncode, 0)
|
||||||
self.assertEqual(errlines[-2], 'xml.etree.ElementTree.ParseError: no element found: line 1369, column 0')
|
outs, errs = proc.communicate()
|
||||||
|
errlines = str(errs).split("\\n")
|
||||||
|
self.assertEqual(errlines[-2], 'xml.etree.ElementTree.ParseError: no element found: line 1369, column 0')
|
||||||
|
|
||||||
|
|
||||||
class Test_Stdout(unittest.TestCase):
|
class Test_Stdout(unittest.TestCase):
|
||||||
@ -337,13 +355,12 @@ class Test_Regex(unittest.TestCase):
|
|||||||
call = self.base_call.format(self.input_file)
|
call = self.base_call.format(self.input_file)
|
||||||
call = call + " --stdout " + input
|
call = call + " --stdout " + input
|
||||||
print(call)
|
print(call)
|
||||||
proc = subprocess.Popen(call, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
|
with subprocess.Popen(call, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) as proc:
|
||||||
stdout, stderr = proc.communicate()
|
stdout, stderr = proc.communicate()
|
||||||
# print(proc.returncode)
|
# we want to check that the bad inputs were caught and sys.exit is stopping the code
|
||||||
|
print(stderr.decode("utf-8"))
|
||||||
|
|
||||||
# we want to check that the bad inputs were caught and sys.exit is stopping the code
|
self.assertNotEqual(proc.returncode, 0)
|
||||||
print(stderr.decode("utf-8"))
|
|
||||||
self.assertNotEqual(proc.returncode, 0)
|
|
||||||
|
|
||||||
def test_basic_regex(self):
|
def test_basic_regex(self):
|
||||||
for i, input in enumerate(self.good_inputs_list):
|
for i, input in enumerate(self.good_inputs_list):
|
||||||
@ -357,9 +374,10 @@ class Test_Regex(unittest.TestCase):
|
|||||||
call = self.base_call_outs.format(self.input_file, self.test_output_dir)
|
call = self.base_call_outs.format(self.input_file, self.test_output_dir)
|
||||||
call = call + " " + input
|
call = call + " " + input
|
||||||
print(call)
|
print(call)
|
||||||
|
with subprocess.Popen(call, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) as proc:
|
||||||
|
proc.wait()
|
||||||
|
assert (proc.returncode == 0)
|
||||||
|
|
||||||
proc = subprocess.Popen(call, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
|
|
||||||
proc.wait()
|
|
||||||
copyfile(self.call_output, test_file)
|
copyfile(self.call_output, test_file)
|
||||||
|
|
||||||
test = pd.read_table(test_file)
|
test = pd.read_table(test_file)
|
||||||
@ -381,8 +399,10 @@ class Test_Regex(unittest.TestCase):
|
|||||||
call = call + " " + input
|
call = call + " " + input
|
||||||
print(call)
|
print(call)
|
||||||
|
|
||||||
proc = subprocess.Popen(call, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
|
print(call)
|
||||||
proc.wait()
|
with subprocess.Popen(call, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) as proc:
|
||||||
|
proc.wait()
|
||||||
|
assert (proc.returncode == 0)
|
||||||
|
|
||||||
copyfile(self.call_output, test_file)
|
copyfile(self.call_output, test_file)
|
||||||
|
|
||||||
|
13
wikiq
13
wikiq
@ -97,6 +97,7 @@ class WikiqPage():
|
|||||||
# 3 A B True
|
# 3 A B True
|
||||||
# 4 A A False
|
# 4 A A False
|
||||||
# Post-loop A Always
|
# Post-loop A Always
|
||||||
|
collapsed_revs = 0
|
||||||
for i, rev in enumerate(self.mwpage):
|
for i, rev in enumerate(self.mwpage):
|
||||||
# never yield the first time
|
# never yield the first time
|
||||||
if i == 0:
|
if i == 0:
|
||||||
@ -491,7 +492,7 @@ class WikiqParser:
|
|||||||
# Construct dump file iterator
|
# Construct dump file iterator
|
||||||
dump = WikiqIterator(self.input_file, collapse_user=self.collapse_user)
|
dump = WikiqIterator(self.input_file, collapse_user=self.collapse_user)
|
||||||
|
|
||||||
# extract list of namspaces
|
# extract list of namespaces
|
||||||
self.namespaces = {ns.name: ns.id for ns in dump.mwiterator.site_info.namespaces}
|
self.namespaces = {ns.name: ns.id for ns in dump.mwiterator.site_info.namespaces}
|
||||||
|
|
||||||
page_count = 0
|
page_count = 0
|
||||||
@ -703,7 +704,6 @@ class WikiqParser:
|
|||||||
line = rev_data.to_tsv_row()
|
line = rev_data.to_tsv_row()
|
||||||
print(line, file=self.output_file)
|
print(line, file=self.output_file)
|
||||||
|
|
||||||
|
|
||||||
def open_input_file(input_filename):
|
def open_input_file(input_filename):
|
||||||
if re.match(r'.*\.7z$', input_filename):
|
if re.match(r'.*\.7z$', input_filename):
|
||||||
cmd = ["7za", "x", "-so", input_filename, "*.xml"]
|
cmd = ["7za", "x", "-so", input_filename, "*.xml"]
|
||||||
@ -711,14 +711,13 @@ def open_input_file(input_filename):
|
|||||||
cmd = ["zcat", input_filename]
|
cmd = ["zcat", input_filename]
|
||||||
elif re.match(r'.*\.bz2$', input_filename):
|
elif re.match(r'.*\.bz2$', input_filename):
|
||||||
cmd = ["bzcat", "-dk", input_filename]
|
cmd = ["bzcat", "-dk", input_filename]
|
||||||
|
else:
|
||||||
|
raise ValueError("Unrecognized file type: %s" % input_filename)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
input_file = Popen(cmd, stdout=PIPE).stdout
|
return Popen(cmd, stdout=PIPE).stdout
|
||||||
except NameError:
|
except NameError:
|
||||||
input_file = open(input_filename, 'r')
|
return open(input_filename, 'r')
|
||||||
|
|
||||||
return input_file
|
|
||||||
|
|
||||||
|
|
||||||
def get_output_filename(input_filename, parquet=False):
|
def get_output_filename(input_filename, parquet=False):
|
||||||
output_filename = re.sub(r'\.(7z|gz|bz2)?$', '', input_filename)
|
output_filename = re.sub(r'\.(7z|gz|bz2)?$', '', input_filename)
|
||||||
|
Loading…
Reference in New Issue
Block a user