got wikidiff2 persistence working except for paragraph moves.
This commit is contained in:
9
wikiq
9
wikiq
@@ -140,7 +140,6 @@ The pattern can include capture groups. If it does then each capture group will
|
||||
If the pattern does not include a capture group, then only one output column will result.
|
||||
"""
|
||||
|
||||
|
||||
class RegexPair(object):
|
||||
def __init__(self, pattern, label):
|
||||
self.pattern = re.compile(pattern)
|
||||
@@ -219,7 +218,7 @@ class WikiqParser:
|
||||
revert_radius: int = 15,
|
||||
output_parquet: bool = True,
|
||||
parquet_buffer_size: int = 2000,
|
||||
wikidiff_url: str = "",
|
||||
wikidiff_url: str = "http://127.0.0.1:8000",
|
||||
):
|
||||
|
||||
"""
|
||||
@@ -450,9 +449,9 @@ class WikiqParser:
|
||||
state = mwpersistence.DiffState(SegmentMatcher(tokenizer=wikitext_split),
|
||||
revert_radius=PERSISTENCE_RADIUS)
|
||||
elif self.persist == PersistMethod.wikidiff:
|
||||
state = mwpersistence.DiffState(WikiDiffMatcher(self.wikidiff_url,
|
||||
revision_texts,
|
||||
tokenizer=wikitext_split),
|
||||
state = mwpersistence.DiffState(WikiDiffMatcher(revision_texts,
|
||||
tokenizer=wikitext_split,
|
||||
self.wikidiff_url),
|
||||
revert_radius=PERSISTENCE_RADIUS)
|
||||
else:
|
||||
from mw.lib import persistence
|
||||
|
||||
Reference in New Issue
Block a user