make wikiq memory efficient again via batch processing.
This commit is contained in:
@@ -89,9 +89,9 @@ class WikiqTester:
|
||||
:return: The output of the wikiq call.
|
||||
"""
|
||||
if out:
|
||||
call = " ".join([WIKIQ, self.input_file, "-o", self.output, *args])
|
||||
call = " ".join([WIKIQ, self.input_file, "-o", self.output, "--batch-size", "10", *args])
|
||||
else:
|
||||
call = " ".join([WIKIQ, self.input_file, *args])
|
||||
call = " ".join([WIKIQ, self.input_file, "--batch-size", "10", *args])
|
||||
|
||||
print(call)
|
||||
return subprocess.check_output(call, stderr=subprocess.PIPE, shell=True)
|
||||
@@ -276,6 +276,20 @@ def test_diff():
|
||||
test = test.reindex(columns=sorted(test.columns))
|
||||
assert_frame_equal(test, baseline, check_like=True)
|
||||
|
||||
def test_diff_plus_pwr():
|
||||
tester = WikiqTester(SAILORMOON, "diff_pwr", in_compression="7z", out_format='parquet', baseline_format='parquet')
|
||||
|
||||
try:
|
||||
tester.call_wikiq("--diff --persistence wikidiff2", "--fandom-2020")
|
||||
except subprocess.CalledProcessError as exc:
|
||||
pytest.fail(exc.stderr.decode("utf8"))
|
||||
|
||||
test = pd.read_parquet(tester.output + f"/{SAILORMOON}.parquet")
|
||||
baseline = pd.read_parquet(tester.baseline_file)
|
||||
|
||||
test = test.reindex(columns=sorted(test.columns))
|
||||
assert_frame_equal(test, baseline, check_like=True)
|
||||
|
||||
def test_text():
|
||||
tester = WikiqTester(SAILORMOON, "text", in_compression="7z", out_format='parquet', baseline_format='parquet')
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -59,7 +59,7 @@ def assert_correct_equal_section(ops, expected_equal_lines, expected_equal_token
|
||||
def test_equality():
|
||||
rev1 = open("test/test_diff_revisions/1285792388").read()
|
||||
# whitespace is added because exact identity reverts do not result in diffs.
|
||||
matcher = WikiDiffMatcher([rev1,rev1 + " "])
|
||||
matcher = WikiDiffMatcher()
|
||||
diff_processor = matcher.processor()
|
||||
ops, a, b = diff_processor.process(rev1)
|
||||
ops, a, b = diff_processor.process(rev1 + " ")
|
||||
@@ -75,7 +75,7 @@ def test_equality():
|
||||
def test_highlight_range_3():
|
||||
rev1 = open("test/test_diff_revisions/test_highlight_3_from").read()
|
||||
rev2 = open("test/test_diff_revisions/test_highlight_3_to").read()
|
||||
matcher = WikiDiffMatcher([rev1,rev2])
|
||||
matcher = WikiDiffMatcher()
|
||||
diff_processor = matcher.processor()
|
||||
diff_processor.process(rev1)
|
||||
ops, a, b = diff_processor.process(rev2)
|
||||
@@ -85,7 +85,7 @@ def test_highlight_range_3():
|
||||
def test_highlight_range_4():
|
||||
rev1 = open("test/test_diff_revisions/test_highlight_4_from").read()
|
||||
rev2 = open("test/test_diff_revisions/test_highlight_4_to").read()
|
||||
matcher = WikiDiffMatcher([rev1,rev2])
|
||||
matcher = WikiDiffMatcher()
|
||||
diff_processor = matcher.processor()
|
||||
diff_processor.process(rev1)
|
||||
ops, a, b = diff_processor.process(rev2)
|
||||
@@ -95,7 +95,7 @@ def test_highlight_range_4():
|
||||
def test_complex_diff():
|
||||
rev1 = open("test/test_diff_revisions/test_complex_from").read()
|
||||
rev2 = open("test/test_diff_revisions/test_complex_to").read()
|
||||
matcher = WikiDiffMatcher([rev1,rev2])
|
||||
matcher = WikiDiffMatcher()
|
||||
diff_processor = matcher.processor()
|
||||
diff_processor.process(rev1)
|
||||
ops, a, b = diff_processor.process(rev2)
|
||||
@@ -107,7 +107,7 @@ def test_complex_diff():
|
||||
def test_highlight_range_unicode():
|
||||
rev1 = open("test/test_diff_revisions/test_unicode_highlight_from").read()
|
||||
rev2 = open("test/test_diff_revisions/test_unicode_highlight_to").read()
|
||||
matcher = WikiDiffMatcher([rev1,rev2])
|
||||
matcher = WikiDiffMatcher()
|
||||
diff_processor = matcher.processor()
|
||||
diff_processor.process(rev1)
|
||||
ops, a, b = diff_processor.process(rev2)
|
||||
@@ -118,7 +118,7 @@ def test_highlight_range_unicode():
|
||||
def test_highlight_range():
|
||||
rev1 = open("test/test_diff_revisions/1295229484_rangeedit0").read()
|
||||
rev2 = open("test/test_diff_revisions/1295229484_rangeedit1").read()
|
||||
matcher = WikiDiffMatcher([rev1,rev2])
|
||||
matcher = WikiDiffMatcher()
|
||||
diff_processor = matcher.processor()
|
||||
diff_processor.process(rev1)
|
||||
ops, a, b = diff_processor.process(rev2)
|
||||
@@ -128,7 +128,7 @@ def test_highlight_range():
|
||||
def test_unmatched_parmoves():
|
||||
rev1 = open("test/test_diff_revisions/test_unmatched_parmoves_from").read()
|
||||
rev2 = open("test/test_diff_revisions/test_unmatched_parmoves_to").read()
|
||||
matcher = WikiDiffMatcher([rev1,rev2])
|
||||
matcher = WikiDiffMatcher()
|
||||
diff_processor = matcher.processor()
|
||||
diff_processor.process(rev1)
|
||||
ops, a, b = diff_processor.process(rev2)
|
||||
@@ -138,7 +138,7 @@ def test_unmatched_parmoves():
|
||||
def test_bug_4():
|
||||
rev1 = open("test/test_diff_revisions/test_bug_4_from").read()
|
||||
rev2 = open("test/test_diff_revisions/test_bug_4_to").read()
|
||||
matcher = WikiDiffMatcher([rev1,rev2])
|
||||
matcher = WikiDiffMatcher()
|
||||
diff_processor = matcher.processor()
|
||||
diff_processor.process(rev1)
|
||||
ops, a, b = diff_processor.process(rev2)
|
||||
@@ -151,7 +151,7 @@ def test_delete():
|
||||
rev2 = open("test/test_diff_revisions/1295229484_delete").read()
|
||||
|
||||
# whitespace is added because exact identity reverts do not result in diffs.
|
||||
matcher = WikiDiffMatcher([rev1,rev2])
|
||||
matcher = WikiDiffMatcher()
|
||||
diff_processor = matcher.processor()
|
||||
diff_processor.process(rev1)
|
||||
ops, a, b = diff_processor.process(rev2)
|
||||
@@ -207,7 +207,7 @@ def test_delete():
|
||||
def test_addition():
|
||||
rev1 = open("test/test_diff_revisions/1285792388").read()
|
||||
rev2 = open("test/test_diff_revisions/1295229484").read()
|
||||
matcher = WikiDiffMatcher([rev1,rev2])
|
||||
matcher = WikiDiffMatcher()
|
||||
diff_processor = matcher.processor()
|
||||
|
||||
# note that a and b are constructed from the diffs.
|
||||
@@ -255,7 +255,7 @@ def test_addition():
|
||||
def test_paragraph_move():
|
||||
rev1 = open("test/test_diff_revisions/1295229484").read()
|
||||
rev2 = open("test/test_diff_revisions/1295229484_parmove").read()
|
||||
matcher = WikiDiffMatcher([rev1,rev2])
|
||||
matcher = WikiDiffMatcher()
|
||||
diff_processor = matcher.processor()
|
||||
|
||||
# note that a and b are constructed from the diffs.
|
||||
@@ -268,7 +268,7 @@ def test_paragraph_move():
|
||||
def test_paragraph_move_and_change():
|
||||
rev1 = open("test/test_diff_revisions/1295229484").read()
|
||||
rev2 = open("test/test_diff_revisions/1295229484_parmove_and_change").read()
|
||||
matcher = WikiDiffMatcher([rev1,rev2])
|
||||
matcher = WikiDiffMatcher()
|
||||
diff_processor = matcher.processor()
|
||||
|
||||
# note that a and b are constructed from the diffs.
|
||||
@@ -281,7 +281,7 @@ def test_paragraph_move_and_change():
|
||||
def test_infobox():
|
||||
rev1 = open("test/test_diff_revisions/test_infobox_from").read()
|
||||
rev2 = open("test/test_diff_revisions/test_infobox_to").read()
|
||||
matcher = WikiDiffMatcher([rev1,rev2])
|
||||
matcher = WikiDiffMatcher()
|
||||
diff_processor = matcher.processor()
|
||||
|
||||
# note that a and b are constructed from the diffs.
|
||||
@@ -294,7 +294,7 @@ def test_infobox():
|
||||
def test_leading_whitespace():
|
||||
rev1 = open("test/test_diff_revisions/test_leading_ws_from").read()
|
||||
rev2 = open("test/test_diff_revisions/test_leading_ws_to").read()
|
||||
matcher = WikiDiffMatcher([rev1,rev2])
|
||||
matcher = WikiDiffMatcher()
|
||||
diff_processor = matcher.processor()
|
||||
|
||||
# note that a and b are constructed from the diffs.
|
||||
@@ -307,7 +307,7 @@ def test_leading_whitespace():
|
||||
def test_whitespace_bug():
|
||||
rev1 = open("test/test_diff_revisions/test_whitespace_bug_from").read()
|
||||
rev2 = open("test/test_diff_revisions/test_whitespace_bug_to").read()
|
||||
matcher = WikiDiffMatcher([rev1,rev2])
|
||||
matcher = WikiDiffMatcher()
|
||||
diff_processor = matcher.processor()
|
||||
|
||||
# note that a and b are constructed from the diffs.
|
||||
@@ -320,7 +320,7 @@ def test_whitespace_bug():
|
||||
def test_bug_3():
|
||||
rev1 = open("test/test_diff_revisions/test_bug_3_from").read()
|
||||
rev2 = open("test/test_diff_revisions/test_bug_3_to").read()
|
||||
matcher = WikiDiffMatcher([rev1,rev2])
|
||||
matcher = WikiDiffMatcher()
|
||||
diff_processor = matcher.processor()
|
||||
|
||||
# note that a and b are constructed from the diffs.
|
||||
@@ -335,7 +335,7 @@ def test_bug_3():
|
||||
def test_actually_equal():
|
||||
rev1 = open("test/test_diff_revisions/1285792388").read()
|
||||
# whitespace is added because exact identity reverts do not result in diffs.
|
||||
matcher = WikiDiffMatcher([rev1,rev1])
|
||||
matcher = WikiDiffMatcher()
|
||||
diff_processor = matcher.processor()
|
||||
ops, a, b = diff_processor.process(rev1)
|
||||
ops, a, b = diff_processor.process(rev1)
|
||||
|
||||
Reference in New Issue
Block a user