make wikiq memory efficient again via batch processing.
This commit is contained in:
@@ -89,9 +89,9 @@ class WikiqTester:
|
||||
:return: The output of the wikiq call.
|
||||
"""
|
||||
if out:
|
||||
call = " ".join([WIKIQ, self.input_file, "-o", self.output, *args])
|
||||
call = " ".join([WIKIQ, self.input_file, "-o", self.output, "--batch-size", "10", *args])
|
||||
else:
|
||||
call = " ".join([WIKIQ, self.input_file, *args])
|
||||
call = " ".join([WIKIQ, self.input_file, "--batch-size", "10", *args])
|
||||
|
||||
print(call)
|
||||
return subprocess.check_output(call, stderr=subprocess.PIPE, shell=True)
|
||||
@@ -276,6 +276,20 @@ def test_diff():
|
||||
test = test.reindex(columns=sorted(test.columns))
|
||||
assert_frame_equal(test, baseline, check_like=True)
|
||||
|
||||
def test_diff_plus_pwr():
|
||||
tester = WikiqTester(SAILORMOON, "diff_pwr", in_compression="7z", out_format='parquet', baseline_format='parquet')
|
||||
|
||||
try:
|
||||
tester.call_wikiq("--diff --persistence wikidiff2", "--fandom-2020")
|
||||
except subprocess.CalledProcessError as exc:
|
||||
pytest.fail(exc.stderr.decode("utf8"))
|
||||
|
||||
test = pd.read_parquet(tester.output + f"/{SAILORMOON}.parquet")
|
||||
baseline = pd.read_parquet(tester.baseline_file)
|
||||
|
||||
test = test.reindex(columns=sorted(test.columns))
|
||||
assert_frame_equal(test, baseline, check_like=True)
|
||||
|
||||
def test_text():
|
||||
tester = WikiqTester(SAILORMOON, "text", in_compression="7z", out_format='parquet', baseline_format='parquet')
|
||||
|
||||
|
||||
Reference in New Issue
Block a user