Refactor revision parsing logic to be columnar #1
1
.gitignore
vendored
1
.gitignore
vendored
@ -13,4 +13,3 @@ uv.lock
|
||||
# Python build and test output
|
||||
__pycache__/
|
||||
/test/test_output/
|
||||
/test/test_output.parquet/
|
||||
|
1
.python-version
Normal file
1
.python-version
Normal file
@ -0,0 +1 @@
|
||||
3.9
|
@ -3,7 +3,7 @@ name = "mediawiki-dump-tools"
|
||||
version = "0.1.0"
|
||||
description = "Add your description here"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.11"
|
||||
requires-python = "~=3.9"
|
||||
dependencies = [
|
||||
"deltas>=0.7.0",
|
||||
"mediawiki-utilities>=0.4.18",
|
||||
@ -12,7 +12,7 @@ dependencies = [
|
||||
"mwtypes>=0.4.0",
|
||||
"mwxml>=0.3.6",
|
||||
"pyarrow>=20.0.0",
|
||||
"yamlconf",
|
||||
"yamlconf>=0.2.6",
|
||||
]
|
||||
|
||||
[tool.uv.sources]
|
||||
|
220
tables.py
Normal file
220
tables.py
Normal file
@ -0,0 +1,220 @@
|
||||
import sys
|
||||
from abc import abstractmethod, ABC
|
||||
from datetime import datetime, timezone
|
||||
from hashlib import sha1
|
||||
from typing import Generic, TypeVar, Union
|
||||
|
||||
import mwreverts
|
||||
import mwtypes
|
||||
import mwxml
|
||||
|
||||
import pyarrow as pa
|
||||
|
||||
T = TypeVar('T')
|
||||
|
||||
|
||||
class RevisionField(ABC, Generic[T]):
|
||||
def __init__(self):
|
||||
self.data: list[T] = []
|
||||
|
||||
"""
|
||||
Abstract type which represents a field in a table of page revisions.
|
||||
"""
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def field(self) -> pa.Field:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def extract(self, page: mwtypes.Page, revisions: list[mwxml.Revision]) -> T:
|
||||
"""
|
||||
:param page: The page for this set of revisions.
|
||||
beason marked this conversation as resolved
|
||||
:param revisions: The set of revisions to compute the field from.
|
||||
Revisions are passed in chronological order, so use revisions[-1] to
|
||||
access the most recent revision in the set.
|
||||
|
||||
Implementations of extract should handle the case where revisions is
|
||||
either a single revision (collapse-user=FALSE), or a full edit session
|
||||
of contiguous edits by the same user (collapse-user=TRUE).
|
||||
"""
|
||||
pass
|
||||
|
||||
def add(self, page: mwtypes.Page, revisions: list[mwxml.Revision]) -> None:
|
||||
self.data.append(self.extract(page, revisions))
|
||||
|
||||
def pop(self) -> list[T]:
|
||||
data = self.data
|
||||
self.data = []
|
||||
return data
|
||||
|
||||
|
||||
class RevisionTable:
|
||||
columns: list[RevisionField]
|
||||
|
||||
def __init__(self, columns: list[RevisionField]):
|
||||
self.columns = columns
|
||||
|
||||
def add(self, page: mwtypes.Page, revisions: list[mwxml.Revision]):
|
||||
for column in self.columns:
|
||||
column.add(page=page, revisions=revisions)
|
||||
|
||||
def schema(self) -> pa.Schema:
|
||||
return pa.schema([c.field for c in self.columns])
|
||||
|
||||
def pop(self) -> dict:
|
||||
data = {}
|
||||
for column in self.columns:
|
||||
data[column.field.name] = column.pop()
|
||||
|
||||
return data
|
||||
|
||||
|
||||
class RevisionId(RevisionField[int]):
|
||||
field = pa.field("revid", pa.int64())
|
||||
|
||||
def extract(self, _: mwtypes.Page, revisions: list[mwxml.Revision]) -> int:
|
||||
revision = revisions[-1]
|
||||
return revision.id
|
||||
|
||||
|
||||
class RevisionTimestamp(RevisionField[datetime]):
|
||||
field = pa.field("date_time", pa.timestamp('s'))
|
||||
|
||||
def extract(self, page: mwtypes.Page, revisions: list[mwxml.Revision]) -> datetime:
|
||||
revision = revisions[-1]
|
||||
return revision.timestamp
|
||||
|
||||
|
||||
class RevisionArticleId(RevisionField[int]):
|
||||
field = pa.field("articleid", pa.int64())
|
||||
|
||||
def extract(self, page: mwtypes.Page, revisions: list[mwxml.Revision]) -> int:
|
||||
return page.id
|
||||
|
||||
|
||||
class RevisionEditorId(RevisionField[Union[int, None]]):
|
||||
field = pa.field("editorid", pa.int64(), nullable=True)
|
||||
|
||||
def extract(self, page: mwtypes.Page, revisions: list[mwxml.Revision]) -> Union[int, None]:
|
||||
revision = revisions[-1]
|
||||
if revision.deleted.user:
|
||||
return None
|
||||
|
||||
return revision.user.id
|
||||
|
||||
|
||||
class RevisionEditSummary(RevisionField[Union[str, None]]):
|
||||
field = pa.field("edit_summary", pa.string(), nullable=True)
|
||||
|
||||
def extract(self, page: mwtypes.Page, revisions: list[mwxml.Revision]) -> Union[str, None]:
|
||||
revision = revisions[-1]
|
||||
|
||||
return revision.comment
|
||||
|
||||
class RevisionIsAnon(RevisionField[Union[bool, None]]):
|
||||
field = pa.field("anon", pa.bool_(), nullable=True)
|
||||
|
||||
def extract(self, page: mwtypes.Page, revisions: list[mwxml.Revision]) -> Union[bool, None]:
|
||||
revision = revisions[-1]
|
||||
if revision.deleted.user:
|
||||
return None
|
||||
|
||||
return revision.user.id is None
|
||||
|
||||
|
||||
class RevisionEditorText(RevisionField[Union[str, None]]):
|
||||
field = pa.field("editor", pa.string(), nullable=True)
|
||||
|
||||
def extract(self, page: mwtypes.Page, revisions: list[mwxml.Revision]) -> Union[str, None]:
|
||||
revision = revisions[-1]
|
||||
if revision.deleted.user:
|
||||
return None
|
||||
|
||||
return revision.user.text
|
||||
|
||||
|
||||
class RevisionPageTitle(RevisionField[str]):
|
||||
field = pa.field("title", pa.string())
|
||||
|
||||
def extract(self, page: mwtypes.Page, revisions: list[mwxml.Revision]) -> str:
|
||||
return page.title
|
||||
|
||||
|
||||
class RevisionDeleted(RevisionField[bool]):
|
||||
field = pa.field("deleted", pa.bool_())
|
||||
|
||||
def extract(self, page: mwtypes.Page, revisions: list[mwxml.Revision]) -> bool:
|
||||
revision = revisions[-1]
|
||||
return revision.deleted.text
|
||||
|
||||
|
||||
class RevisionNamespace(RevisionField[int]):
|
||||
field = pa.field("namespace", pa.int32())
|
||||
|
||||
def extract(self, page: mwtypes.Page, revisions: list[mwxml.Revision]) -> int:
|
||||
return page.namespace
|
||||
|
||||
|
||||
class RevisionSha1(RevisionField[str]):
|
||||
field = pa.field("sha1", pa.string())
|
||||
|
||||
def extract(self, page: mwtypes.Page, revisions: list[mwxml.Revision]) -> str:
|
||||
revision = revisions[-1]
|
||||
return revision.sha1
|
||||
|
||||
|
||||
class RevisionTextChars(RevisionField[Union[int, None]]):
|
||||
field = pa.field("text_chars", pa.int32(), nullable=True)
|
||||
|
||||
def extract(self, page: mwtypes.Page, revisions: list[mwxml.Revision]) -> Union[int, None]:
|
||||
revision = revisions[-1]
|
||||
if not revision.deleted.text:
|
||||
return len(revision.text)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
class RevisionText(RevisionField[str]):
|
||||
field = pa.field("text", pa.string())
|
||||
|
||||
def extract(self, page: mwtypes.Page, revisions: list[mwxml.Revision]) -> str:
|
||||
revision = revisions[-1]
|
||||
return revision.text
|
||||
|
||||
|
||||
class RevisionIsMinor(RevisionField[bool]):
|
||||
field = pa.field("minor", pa.bool_())
|
||||
|
||||
def extract(self, page: mwtypes.Page, revisions: list[mwxml.Revision]) -> bool:
|
||||
revision = revisions[-1]
|
||||
return revision.minor
|
||||
|
||||
|
||||
class RevisionReverts(RevisionField[Union[str, None]]):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.rev_detector: Union[mwreverts.Detector, None] = None
|
||||
|
||||
field = pa.field("reverteds", pa.string(), nullable=True)
|
||||
|
||||
def extract(self, page: mwtypes.Page, revisions: list[mwxml.Revision]) -> Union[str, None]:
|
||||
if self.rev_detector is None:
|
||||
return None
|
||||
|
||||
revision = revisions[-1]
|
||||
if revision.deleted.text:
|
||||
return None
|
||||
|
||||
revert = self.rev_detector.process(revision.sha1, revision.id)
|
||||
if revert is None:
|
||||
return None
|
||||
|
||||
return ",".join([str(s) for s in revert.reverteds])
|
||||
|
||||
|
||||
class RevisionCollapsed(RevisionField[int]):
|
||||
field = pa.field("collapsed_revs", pa.int64())
|
||||
|
||||
def extract(self, page: mwtypes.Page, revisions: list[mwxml.Revision]) -> int:
|
||||
return len(revisions)
|
@ -1,12 +1,17 @@
|
||||
import shutil
|
||||
import sys
|
||||
import unittest
|
||||
import os
|
||||
import subprocess
|
||||
from shutil import copyfile
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from pandas.testing import assert_frame_equal
|
||||
from pandas import DataFrame
|
||||
from pandas.testing import assert_frame_equal, assert_series_equal
|
||||
from io import StringIO
|
||||
import tracemalloc
|
||||
from typing import Final
|
||||
from typing import Final, Union
|
||||
|
||||
# Make references to files and wikiq relative to this file, not to the current working directory.
|
||||
TEST_DIR: Final[str] = os.path.dirname(os.path.realpath(__file__))
|
||||
@ -19,6 +24,7 @@ SAILORMOON: Final[str] = "sailormoon"
|
||||
TWINPEAKS: Final[str] = "twinpeaks"
|
||||
REGEXTEST: Final[str] = "regextest"
|
||||
|
||||
|
||||
def setup():
|
||||
tracemalloc.start()
|
||||
|
||||
@ -35,25 +41,39 @@ setup()
|
||||
class WikiqTester:
|
||||
def __init__(self,
|
||||
wiki: str,
|
||||
case_name: str | None = None,
|
||||
suffix: str | None = None,
|
||||
case_name: str,
|
||||
suffix: Union[str, None] = None,
|
||||
in_compression: str = "bz2",
|
||||
baseline_format: str = "tsv",
|
||||
out_format: str = "tsv",
|
||||
):
|
||||
self.input_file = os.path.join(TEST_DIR, "dumps", "{0}.xml.{1}".format(wiki, in_compression))
|
||||
|
||||
basename = "{0}_{1}".format(case_name, wiki)
|
||||
if suffix:
|
||||
basename = "{0}_{1}".format(basename, suffix)
|
||||
|
||||
self.output = os.path.join(TEST_OUTPUT_DIR, "{0}.{1}".format(basename, out_format))
|
||||
|
||||
if os.path.exists(self.output):
|
||||
if os.path.isfile(self.output):
|
||||
os.remove(self.output)
|
||||
else:
|
||||
shutil.rmtree(self.output)
|
||||
|
||||
if out_format == "parquet":
|
||||
os.makedirs(self.output, exist_ok=True)
|
||||
|
||||
if suffix is None:
|
||||
self.wikiq_baseline_name = "{0}.{1}".format(wiki, baseline_format)
|
||||
self.wikiq_out_name = "{0}.{1}".format(wiki, out_format)
|
||||
else:
|
||||
self.wikiq_baseline_name = "{0}_{1}.{2}".format(wiki, suffix, baseline_format)
|
||||
self.wikiq_out_name = "{0}_{1}.{2}".format(wiki, suffix, out_format)
|
||||
self.call_output = os.path.join(TEST_OUTPUT_DIR, "{0}.{1}".format(wiki, out_format))
|
||||
|
||||
# If case_name is unset, there are no relevant baseline or test files.
|
||||
if case_name is not None:
|
||||
self.baseline_file = os.path.join(BASELINE_DIR, "{0}_{1}".format(case_name, self.wikiq_out_name))
|
||||
self.test_file = os.path.join(TEST_OUTPUT_DIR, "{0}_{1}".format(case_name, self.wikiq_out_name))
|
||||
if os.path.exists(self.test_file):
|
||||
os.remove(self.test_file)
|
||||
self.baseline_file = os.path.join(BASELINE_DIR, "{0}_{1}".format(case_name, self.wikiq_baseline_name))
|
||||
|
||||
def call_wikiq(self, *args: str, out: bool = True):
|
||||
"""
|
||||
@ -63,13 +83,14 @@ class WikiqTester:
|
||||
:return: The output of the wikiq call.
|
||||
"""
|
||||
if out:
|
||||
call = ' '.join([WIKIQ, self.input_file, "-o", TEST_OUTPUT_DIR, *args])
|
||||
call = ' '.join([WIKIQ, self.input_file, "-o", self.output, *args])
|
||||
else:
|
||||
call = ' '.join([WIKIQ, self.input_file, *args])
|
||||
|
||||
print(call)
|
||||
return subprocess.check_output(call, stderr=subprocess.PIPE, shell=True)
|
||||
|
||||
|
||||
# with / without pwr DONE
|
||||
# with / without url encode DONE
|
||||
# with / without collapse user DONE
|
||||
@ -81,18 +102,15 @@ class WikiqTester:
|
||||
# malformed xmls DONE
|
||||
|
||||
class WikiqTestCase(unittest.TestCase):
|
||||
def test_WP_url_encode(self):
|
||||
tester = WikiqTester(IKWIKI, "url-encode")
|
||||
def test_WP_noargs(self):
|
||||
tester = WikiqTester(IKWIKI, "noargs")
|
||||
|
||||
try:
|
||||
tester.call_wikiq("--url-encode")
|
||||
tester.call_wikiq()
|
||||
except subprocess.CalledProcessError as exc:
|
||||
self.fail(exc.stderr.decode("utf8"))
|
||||
|
||||
copyfile(tester.call_output, tester.test_file)
|
||||
|
||||
# as a test let's make sure that we get equal data frames
|
||||
test = pd.read_table(tester.test_file)
|
||||
test = pd.read_table(tester.output)
|
||||
baseline = pd.read_table(tester.baseline_file)
|
||||
assert_frame_equal(test, baseline, check_like=True)
|
||||
|
||||
@ -104,10 +122,8 @@ class WikiqTestCase(unittest.TestCase):
|
||||
except subprocess.CalledProcessError as exc:
|
||||
self.fail(exc.stderr.decode("utf8"))
|
||||
|
||||
copyfile(tester.call_output, tester.test_file)
|
||||
|
||||
# as a test let's make sure that we get equal data frames
|
||||
test = pd.read_table(tester.test_file)
|
||||
test = pd.read_table(tester.output)
|
||||
num_wrong_ns = sum(~ test.namespace.isin({0, 1}))
|
||||
self.assertEqual(num_wrong_ns, 0)
|
||||
baseline = pd.read_table(tester.baseline_file)
|
||||
@ -121,15 +137,40 @@ class WikiqTestCase(unittest.TestCase):
|
||||
except subprocess.CalledProcessError as exc:
|
||||
self.fail(exc.stderr.decode("utf8"))
|
||||
|
||||
copyfile(tester.call_output, tester.test_file)
|
||||
|
||||
# as a test let's make sure that we get equal data frames
|
||||
test = pd.read_table(tester.test_file)
|
||||
test = pd.read_table(tester.output)
|
||||
num_wrong_ns = sum(~ test.namespace.isin({0, 1}))
|
||||
self.assertEqual(num_wrong_ns, 0)
|
||||
baseline = pd.read_table(tester.baseline_file)
|
||||
assert_frame_equal(test, baseline, check_like=True)
|
||||
|
||||
def test_WP_no_revert_radius(self):
|
||||
tester = WikiqTester(IKWIKI, "no_revert_radius")
|
||||
|
||||
try:
|
||||
tester.call_wikiq("-rr 0")
|
||||
except subprocess.CalledProcessError as exc:
|
||||
self.fail(exc.stderr.decode("utf8"))
|
||||
|
||||
# as a test let's make sure that we get equal data frames
|
||||
test = pd.read_table(tester.output)
|
||||
num_reverted = sum(i is None for i in test.revert)
|
||||
self.assertEqual(num_reverted, 0)
|
||||
baseline = pd.read_table(tester.baseline_file)
|
||||
assert_frame_equal(test, baseline, check_like=True)
|
||||
|
||||
def test_WP_collapse_user(self):
|
||||
tester = WikiqTester(IKWIKI, "collapse_user")
|
||||
|
||||
try:
|
||||
tester.call_wikiq("--collapse-user")
|
||||
except subprocess.CalledProcessError as exc:
|
||||
self.fail(exc.stderr.decode("utf8"))
|
||||
|
||||
test = pd.read_table(tester.output)
|
||||
baseline = pd.read_table(tester.baseline_file)
|
||||
assert_frame_equal(test, baseline, check_like=True)
|
||||
|
||||
def test_noargs(self):
|
||||
tester = WikiqTester(SAILORMOON, "noargs", in_compression="7z")
|
||||
|
||||
@ -138,9 +179,7 @@ class WikiqTestCase(unittest.TestCase):
|
||||
except subprocess.CalledProcessError as exc:
|
||||
self.fail(exc.stderr.decode("utf8"))
|
||||
|
||||
copyfile(tester.call_output, tester.test_file)
|
||||
|
||||
test = pd.read_table(tester.test_file)
|
||||
test = pd.read_table(tester.output)
|
||||
baseline = pd.read_table(tester.baseline_file)
|
||||
assert_frame_equal(test, baseline, check_like=True)
|
||||
|
||||
@ -148,13 +187,11 @@ class WikiqTestCase(unittest.TestCase):
|
||||
tester = WikiqTester(SAILORMOON, "collapse-user", in_compression="7z")
|
||||
|
||||
try:
|
||||
tester.call_wikiq("--collapse-user --fandom-2020")
|
||||
tester.call_wikiq("--collapse-user", "--fandom-2020")
|
||||
except subprocess.CalledProcessError as exc:
|
||||
self.fail(exc.stderr.decode("utf8"))
|
||||
|
||||
copyfile(tester.call_output, tester.test_file)
|
||||
|
||||
test = pd.read_table(tester.test_file)
|
||||
test = pd.read_table(tester.output)
|
||||
baseline = pd.read_table(tester.baseline_file)
|
||||
assert_frame_equal(test, baseline, check_like=True)
|
||||
|
||||
@ -162,13 +199,11 @@ class WikiqTestCase(unittest.TestCase):
|
||||
tester = WikiqTester(SAILORMOON, "persistence_segment", in_compression="7z")
|
||||
|
||||
try:
|
||||
tester.call_wikiq("--persistence segment --fandom-2020")
|
||||
tester.call_wikiq("--persistence segment", "--fandom-2020")
|
||||
except subprocess.CalledProcessError as exc:
|
||||
self.fail(exc.stderr.decode("utf8"))
|
||||
|
||||
copyfile(tester.call_output, tester.test_file)
|
||||
|
||||
test = pd.read_table(tester.test_file)
|
||||
test = pd.read_table(tester.output)
|
||||
baseline = pd.read_table(tester.baseline_file)
|
||||
assert_frame_equal(test, baseline, check_like=True)
|
||||
|
||||
@ -176,13 +211,11 @@ class WikiqTestCase(unittest.TestCase):
|
||||
tester = WikiqTester(SAILORMOON, "persistence_legacy", in_compression="7z")
|
||||
|
||||
try:
|
||||
tester.call_wikiq("--persistence legacy --fandom-2020")
|
||||
tester.call_wikiq("--persistence legacy", "--fandom-2020")
|
||||
except subprocess.CalledProcessError as exc:
|
||||
self.fail(exc.stderr.decode("utf8"))
|
||||
|
||||
copyfile(tester.call_output, tester.test_file)
|
||||
|
||||
test = pd.read_table(tester.test_file)
|
||||
test = pd.read_table(tester.output)
|
||||
baseline = pd.read_table(tester.baseline_file)
|
||||
assert_frame_equal(test, baseline, check_like=True)
|
||||
|
||||
@ -190,35 +223,18 @@ class WikiqTestCase(unittest.TestCase):
|
||||
tester = WikiqTester(SAILORMOON, "persistence", in_compression="7z")
|
||||
|
||||
try:
|
||||
tester.call_wikiq("--persistence --fandom-2020")
|
||||
tester.call_wikiq("--persistence", "--fandom-2020")
|
||||
except subprocess.CalledProcessError as exc:
|
||||
self.fail(exc.stderr.decode("utf8"))
|
||||
|
||||
copyfile(tester.call_output, tester.test_file)
|
||||
|
||||
test = pd.read_table(tester.test_file)
|
||||
baseline = pd.read_table(tester.baseline_file)
|
||||
|
||||
test = test.reindex(columns=sorted(test.columns))
|
||||
assert_frame_equal(test, baseline, check_like=True)
|
||||
|
||||
def test_url_encode(self):
|
||||
tester = WikiqTester(SAILORMOON, "url-encode", in_compression="7z")
|
||||
|
||||
try:
|
||||
tester.call_wikiq("--url-encode --fandom-2020")
|
||||
except subprocess.CalledProcessError as exc:
|
||||
self.fail(exc.stderr.decode("utf8"))
|
||||
|
||||
copyfile(tester.call_output, tester.test_file)
|
||||
test = pd.read_table(tester.test_file)
|
||||
test = pd.read_table(tester.output)
|
||||
baseline = pd.read_table(tester.baseline_file)
|
||||
|
||||
test = test.reindex(columns=sorted(test.columns))
|
||||
assert_frame_equal(test, baseline, check_like=True)
|
||||
|
||||
def test_malformed_noargs(self):
|
||||
tester = WikiqTester(wiki=TWINPEAKS, in_compression="7z")
|
||||
tester = WikiqTester(wiki=TWINPEAKS, case_name="noargs", in_compression="7z")
|
||||
want_exception = 'xml.etree.ElementTree.ParseError: no element found: line 1369, column 0'
|
||||
|
||||
try:
|
||||
@ -233,18 +249,16 @@ class WikiqTestCase(unittest.TestCase):
|
||||
tester = WikiqTester(wiki=SAILORMOON, case_name="noargs", in_compression="7z")
|
||||
|
||||
try:
|
||||
outs = tester.call_wikiq( "--stdout --fandom-2020", out=False).decode("utf8")
|
||||
outs = tester.call_wikiq("--stdout", "--fandom-2020", out=False).decode("utf8")
|
||||
except subprocess.CalledProcessError as exc:
|
||||
self.fail(exc.stderr.decode("utf8"))
|
||||
|
||||
copyfile(tester.call_output, tester.test_file)
|
||||
|
||||
test = pd.read_table(StringIO(outs))
|
||||
baseline = pd.read_table(tester.baseline_file)
|
||||
assert_frame_equal(test, baseline, check_like=True)
|
||||
|
||||
def test_bad_regex(self):
|
||||
tester = WikiqTester(wiki=REGEXTEST)
|
||||
tester = WikiqTester(wiki=REGEXTEST, case_name="bad_regex")
|
||||
|
||||
# sample arguments for checking that bad arguments get terminated / test_regex_arguments
|
||||
bad_arguments_list = [
|
||||
@ -281,13 +295,11 @@ class WikiqTestCase(unittest.TestCase):
|
||||
tester = WikiqTester(wiki=REGEXTEST, case_name="basic", suffix=str(i))
|
||||
|
||||
try:
|
||||
tester.call_wikiq( arguments)
|
||||
tester.call_wikiq(arguments)
|
||||
except subprocess.CalledProcessError as exc:
|
||||
self.fail(exc.stderr.decode("utf8"))
|
||||
|
||||
copyfile(tester.call_output, tester.test_file)
|
||||
|
||||
test = pd.read_table(tester.test_file)
|
||||
test = pd.read_table(tester.output)
|
||||
|
||||
baseline = pd.read_table(tester.baseline_file)
|
||||
assert_frame_equal(test, baseline, check_like=True)
|
||||
@ -307,13 +319,48 @@ class WikiqTestCase(unittest.TestCase):
|
||||
except subprocess.CalledProcessError as exc:
|
||||
self.fail(exc.stderr.decode("utf8"))
|
||||
|
||||
copyfile(tester.call_output, tester.test_file)
|
||||
|
||||
test = pd.read_table(tester.test_file)
|
||||
test = pd.read_table(tester.output)
|
||||
|
||||
baseline = pd.read_table(tester.baseline_file)
|
||||
assert_frame_equal(test, baseline, check_like=True)
|
||||
|
||||
def test_parquet(self):
|
||||
tester = WikiqTester(IKWIKI, "noargs", out_format="parquet")
|
||||
|
||||
try:
|
||||
tester.call_wikiq()
|
||||
except subprocess.CalledProcessError as exc:
|
||||
self.fail(exc.stderr.decode("utf8"))
|
||||
|
||||
# as a test let's make sure that we get equal data frames
|
||||
test: DataFrame = pd.read_parquet(tester.output)
|
||||
# test = test.drop(['reverteds'], axis=1)
|
||||
|
||||
baseline: DataFrame = pd.read_table(tester.baseline_file)
|
||||
|
||||
# Pandas does not read timestamps as the desired datetime type.
|
||||
baseline['date_time'] = pd.to_datetime(baseline['date_time'])
|
||||
# Split strings to the arrays of reverted IDs so they can be compared.
|
||||
baseline['revert'] = baseline['revert'].replace(np.nan, None)
|
||||
baseline['reverteds'] = baseline['reverteds'].replace(np.nan, None)
|
||||
# baseline['reverteds'] = [None if i is np.nan else [int(j) for j in str(i).split(",")] for i in baseline['reverteds']]
|
||||
baseline['sha1'] = baseline['sha1'].replace(np.nan, None)
|
||||
baseline['editor'] = baseline['editor'].replace(np.nan, None)
|
||||
baseline['anon'] = baseline['anon'].replace(np.nan, None)
|
||||
|
||||
for index, row in baseline.iterrows():
|
||||
if row['revert'] != test['revert'][index]:
|
||||
print(row['revid'], ":", row['revert'], "!=", test['revert'][index])
|
||||
|
||||
for col in baseline.columns:
|
||||
try:
|
||||
assert_series_equal(test[col], baseline[col], check_like=True, check_dtype=False)
|
||||
except ValueError as exc:
|
||||
print(f"Error comparing column {col}")
|
||||
self.fail(exc)
|
||||
|
||||
# assert_frame_equal(test, baseline, check_like=True, check_dtype=False)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@ -1,27 +1,27 @@
|
||||
anon articleid date_time deleted editor editorid minor namespace revert reverteds revid sha1 text_chars threedigits title
|
||||
FALSE 56237363 2018-01-07 10:40:58 FALSE "NinjaRobotPirate" 3742946 FALSE 3 FALSE 819091731 135nz8q6lfam6cojla7azb7k5alx3t3 1141 126, 126, 126, 126 "User talk:86.139.142.254"
|
||||
FALSE 56237364 2018-01-07 10:41:10 FALSE "Kavin kavitha" 32792125 FALSE 3 FALSE 819091755 0pwezjc6yopz0smc8al6ogc4fax5bwo 663 "User talk:Kavin kavitha"
|
||||
FALSE 56237365 2018-01-07 10:41:26 FALSE "Amicable always" 32621254 FALSE 3 FALSE 819091788 sz3t2ap7z8bpkdvdvi195f3i35949bv 399 "User talk:Dr.vivek163"
|
||||
FALSE 56237366 2018-01-07 10:41:31 FALSE "ClueBot NG" 13286072 FALSE 3 FALSE 819091796 r6s5j8j3iykenrhuhpnkpsmmd71vubf 1260 "User talk:Twistorl"
|
||||
FALSE 56237368 2018-01-07 10:41:51 FALSE "Khruner" 8409334 FALSE 0 FALSE 819091825 tf5qz2yaswx61zrlm9ovxzuhl7r2dc4 2249 119, 978, 500, 292, 225, 199, 292 "Kom Firin"
|
||||
FALSE 56237368 2018-01-27 12:16:02 FALSE "Khruner" 8409334 TRUE 0 FALSE 822610647 e6oa4g0qv64icdaq26uu1zzbyr5hcbh 2230 119, 978, 500, 292, 225, 199, 292 "Kom Firin"
|
||||
FALSE 56237369 2018-01-07 10:42:05 FALSE "Editingaccount1994" 32794215 FALSE 2 FALSE 819091844 0fyvyh2a8xu41gt8obr34oba0bfixj6 27840 798, 150, 150, 150, 621, 137, 137, 150, 150, 350, 195, 350, 195, 180, 180, 350, 195, 300, 150, 150, 150, 180, 180, 621 "User:Editingaccount1994/sandbox"
|
||||
FALSE 56237369 2018-01-07 11:09:52 FALSE "AnomieBOT" 7611264 TRUE 2 FALSE 819093984 8gy52aolt5rg3eaketwj5v7eiw0apv2 27787 798, 150, 150, 150, 621, 137, 137, 150, 150, 350, 195, 350, 195, 180, 180, 350, 195, 300, 150, 150, 150, 180, 180, 621 "User:Editingaccount1994/sandbox"
|
||||
FALSE 56237369 2018-01-12 21:45:50 FALSE "SporkBot" 12406635 TRUE 2 FALSE 820064189 he8ydemaanxlrpftqxkez8jfpge1fsj 27784 798, 150, 150, 150, 621, 137, 137, 150, 150, 350, 195, 350, 195, 180, 180, 350, 195, 300, 150, 150, 150, 180, 180, 621 "User:Editingaccount1994/sandbox"
|
||||
FALSE 56237369 2018-01-12 23:28:11 FALSE "SporkBot" 12406635 TRUE 2 FALSE 820078679 0to17w9rth3url8n7gvucdtobybdq5h 27783 798, 150, 150, 150, 621, 137, 137, 150, 150, 350, 195, 350, 195, 180, 180, 350, 195, 300, 150, 150, 150, 180, 180, 621 "User:Editingaccount1994/sandbox"
|
||||
FALSE 56237369 2018-01-12 23:28:39 FALSE "SporkBot" 12406635 TRUE 2 FALSE 820078733 531dizmmloyxffbkdr5vph7owh921eg 27782 798, 150, 150, 150, 621, 137, 137, 150, 150, 350, 195, 350, 195, 180, 180, 350, 195, 300, 150, 150, 150, 180, 180, 621 "User:Editingaccount1994/sandbox"
|
||||
FALSE 56237369 2018-01-13 13:45:33 FALSE "Frietjes" 13791031 FALSE 2 FALSE 820177382 nik9p2u2fuk4yazjxt8ymbicxv5qid9 27757 798, 150, 150, 150, 621, 100, 621 "User:Editingaccount1994/sandbox"
|
||||
FALSE 56237369 2018-01-24 01:35:22 FALSE "CommonsDelinker" 2304267 FALSE 2 FALSE 822038928 gwk6pampl8si1v5pv3kwgteg710sfw3 27667 798, 150, 150, 150, 621, 100, 621 "User:Editingaccount1994/sandbox"
|
||||
FALSE 56237370 2018-01-07 10:42:20 FALSE "PamD" 1368779 FALSE 0 FALSE 819091874 n4ozbsgle13p9yywtfrz982ccj8woc9 25 "Anita del Rey"
|
||||
FALSE 56237371 2018-01-07 10:42:27 FALSE "ClueBot NG" 13286072 FALSE 3 FALSE 819091883 ksohnvsbeuzwpl5vb8a3v8m18hva0a7 1274 119, 157, 119, 157, 119, 157, 119, 157 "User talk:119.94.96.157"
|
||||
FALSE 56237372 2018-01-07 10:42:50 FALSE "Underbar dk" 677153 FALSE 14 FALSE 819091914 je7aw21fedbwyqsyofpisdrynsu7olr 113 "Category:Ohmi Railway"
|
||||
FALSE 56237375 2018-01-07 10:43:32 FALSE "TastyPoutine" 882433 FALSE 3 FALSE 819091968 cpm4tkzcx4hc6irr9ukbi06ogud8dtq 199 "User talk:92.226.219.222"
|
||||
FALSE 56237375 2018-01-07 11:10:24 FALSE "AnomieBOT" 7611264 TRUE 3 FALSE 819094036 artmfz8b2gxhb3pp8a5p4ksplxqfkpg 1840 "User talk:92.226.219.222"
|
||||
FALSE 56237375 2018-01-07 14:33:36 FALSE "Only" 702940 FALSE 3 FALSE 819112363 dn9wj0n8d8pdd5lqe56uw5xamupowr1 2949 126, 126, 126, 126 "User talk:92.226.219.222"
|
||||
FALSE 56237376 2018-01-07 10:44:01 FALSE "Dipayanacharya" 32794237 FALSE 2 FALSE 819092004 ofueugwatmmn7u73isw732neuza57gk 28 "User:Dipayanacharya"
|
||||
FALSE 56237376 2018-01-07 10:49:08 FALSE "Dipayanacharya" 32794237 FALSE 2 FALSE 819092390 dsz55xv96ec2uv6w9c1z7c52ipfovbw 38 "User:Dipayanacharya"
|
||||
FALSE 56237378 2018-01-07 10:44:56 FALSE "Vinegarymass911" 21516552 FALSE 0 FALSE 819092066 9ma38hak0ef1ew4fpiutxpnzd8oz1wd 65 "BSCIC"
|
||||
FALSE 56237379 2018-01-07 10:45:21 FALSE "BrownHairedGirl" 754619 FALSE 14 FALSE 819092102 4dvakoat58bzyf5hmtthxukt29hip6n 285 "Category:Women government ministers of Yemen"
|
||||
FALSE 56237381 2018-01-07 10:45:54 FALSE "PRehse" 410898 FALSE 1 FALSE 819092135 2sjrxsc7os9k9pg4su2t4rk2j8nn0h7 103 "Talk:List of Morning Glories Characters"
|
||||
FALSE 56237382 2018-01-07 10:45:56 FALSE "ClueBot NG" 13286072 FALSE 3 FALSE 819092138 3y9t5wpk6ur5jhone75rhm4wjf01fgi 1330 106, 207, 126, 114, 106, 207, 126, 114, 106, 207, 126, 114, 106, 207, 126, 114 "User talk:106.207.126.114"
|
||||
FALSE 56237382 2018-01-07 10:50:22 FALSE "HindWIKI" 31190506 FALSE 3 FALSE 819092495 8wvn6vh3isyt0dorpe89lztrburgupe 2355 106, 207, 126, 114, 106, 207, 126, 114, 106, 207, 126, 114, 106, 207, 126, 114 "User talk:106.207.126.114"
|
||||
"revid" "date_time" "articleid" "title" "namespace" "deleted" "editorid" "edit_summary" "text_chars" "reverteds" "sha1" "minor" "editor" "anon" "revert" "threedigits"
|
||||
819091731 2018-01-07 10:40:58 56237363 "User talk:86.139.142.254" 3 false 3742946 "Your IP address has been blocked from editing because it has been used to [[WP:EVADE|evade a previous block]]. ([[WP:TW|TW]])" 1141 "135nz8q6lfam6cojla7azb7k5alx3t3" false "NinjaRobotPirate" false false "126, 126, 126, 126"
|
||||
819091755 2018-01-07 10:41:10 56237364 "User talk:Kavin kavitha" 3 false 32792125 "[[WP:AES|←]]Created page with ''''''Kavin (Tamil. கவின்) is a masculine given name, which is Tamil for ""beauty"", ""grace"", ""fairness"" or ""comeliness""Kavin is born on 01 /12/2001 at Sa...'" 663 "0pwezjc6yopz0smc8al6ogc4fax5bwo" false "Kavin kavitha" false false
|
||||
819091788 2018-01-07 10:41:26 56237365 "User talk:Dr.vivek163" 3 false 32621254 "/* Regarding Merger discussion */ new section" 399 "sz3t2ap7z8bpkdvdvi195f3i35949bv" false "Amicable always" false false
|
||||
819091796 2018-01-07 10:41:31 56237366 "User talk:Twistorl" 3 false 13286072 "Warning [[Special:Contributions/Twistorl|Twistorl]] - #1" 1260 "r6s5j8j3iykenrhuhpnkpsmmd71vubf" false "ClueBot NG" false false
|
||||
819091825 2018-01-07 10:41:51 56237368 "Kom Firin" 0 false 8409334 "[[WP:AES|←]]Created page with '[[File:Stele 67.119 Brooklyn.jpg|thumb|Stele of the [[Libu#Great Chiefs of the Libu|Chief of the Libu]] Titaru, a contemporary of pharaoh [[Shoshenq V]] of the [...'TestCaseB and you're a Tor node " 2249 "tf5qz2yaswx61zrlm9ovxzuhl7r2dc4" false "Khruner" false false "119, 978, 500, 292, 225, 199, 292"
|
||||
822610647 2018-01-27 12:16:02 56237368 "Kom Firin" 0 false 8409334 "/* History */ typo" 2230 "e6oa4g0qv64icdaq26uu1zzbyr5hcbh" true "Khruner" false false "119, 978, 500, 292, 225, 199, 292"
|
||||
819091844 2018-01-07 10:42:05 56237369 "User:Editingaccount1994/sandbox" 2 false 32794215 "[[WP:AES|←]]Created page with '{{User sandbox}} <!-- EDIT BELOW THIS LINE --> {{voir homonymes|Chevalier}} {{Infobox Artiste | nom = Li Chevalier | autres noms = | im...'" 27840 "0fyvyh2a8xu41gt8obr34oba0bfixj6" false "Editingaccount1994" false false "798, 150, 150, 150, 621, 137, 137, 150, 150, 350, 195, 350, 195, 180, 180, 350, 195, 300, 150, 150, 150, 180, 180, 621"
|
||||
819093984 2018-01-07 11:09:52 56237369 "User:Editingaccount1994/sandbox" 2 false 7611264 "[[User:AnomieBOT/docs/TemplateSubster|Substing templates]]: {{Lien web}}. See [[User:AnomieBOT/docs/TemplateSubster]] for info." 27787 "8gy52aolt5rg3eaketwj5v7eiw0apv2" true "AnomieBOT" false false "798, 150, 150, 150, 621, 137, 137, 150, 150, 350, 195, 350, 195, 180, 180, 350, 195, 300, 150, 150, 150, 180, 180, 621"
|
||||
820064189 2018-01-12 21:45:50 56237369 "User:Editingaccount1994/sandbox" 2 false 12406635 "Orphan per [[WP:TFD|TFD outcome]]" 27784 "he8ydemaanxlrpftqxkez8jfpge1fsj" true "SporkBot" false false "798, 150, 150, 150, 621, 137, 137, 150, 150, 350, 195, 350, 195, 180, 180, 350, 195, 300, 150, 150, 150, 180, 180, 621"
|
||||
820078679 2018-01-12 23:28:11 56237369 "User:Editingaccount1994/sandbox" 2 false 12406635 "Replace template per [[Wikipedia:Templates for discussion/Log/2010 June 13|TFD outcome]]; no change in content" 27783 "0to17w9rth3url8n7gvucdtobybdq5h" true "SporkBot" false false "798, 150, 150, 150, 621, 137, 137, 150, 150, 350, 195, 350, 195, 180, 180, 350, 195, 300, 150, 150, 150, 180, 180, 621"
|
||||
820078733 2018-01-12 23:28:39 56237369 "User:Editingaccount1994/sandbox" 2 false 12406635 "Replace template per [[Wikipedia:Templates for discussion/Log/2011 February 17|TFD outcome]]; no change in content" 27782 "531dizmmloyxffbkdr5vph7owh921eg" true "SporkBot" false false "798, 150, 150, 150, 621, 137, 137, 150, 150, 350, 195, 350, 195, 180, 180, 350, 195, 300, 150, 150, 150, 180, 180, 621"
|
||||
820177382 2018-01-13 13:45:33 56237369 "User:Editingaccount1994/sandbox" 2 false 13791031 "translate TestCaseD if you are from tor you need neutral point of view " 27757 "nik9p2u2fuk4yazjxt8ymbicxv5qid9" false "Frietjes" false false "798, 150, 150, 150, 621, 100, 621"
|
||||
822038928 2018-01-24 01:35:22 56237369 "User:Editingaccount1994/sandbox" 2 false 2304267 "Removing [[:c:File:Li_Chevalier_Art_Studio.jpg|Li_Chevalier_Art_Studio.jpg]], it has been deleted from Commons by [[:c:User:JuTa|JuTa]] because: [[:c:COM:OTRS|No permission]] since 16 January 2018." 27667 "gwk6pampl8si1v5pv3kwgteg710sfw3" false "CommonsDelinker" false false "798, 150, 150, 150, 621, 100, 621"
|
||||
819091874 2018-01-07 10:42:20 56237370 "Anita del Rey" 0 false 1368779 "r from alt name" 25 "n4ozbsgle13p9yywtfrz982ccj8woc9" false "PamD" false false
|
||||
819091883 2018-01-07 10:42:27 56237371 "User talk:119.94.96.157" 3 false 13286072 "Warning [[Special:Contributions/119.94.96.157|119.94.96.157]] - #1" 1274 "ksohnvsbeuzwpl5vb8a3v8m18hva0a7" false "ClueBot NG" false false "119, 157, 119, 157, 119, 157, 119, 157"
|
||||
819091914 2018-01-07 10:42:50 56237372 "Category:Ohmi Railway" 14 false 677153 "[[WP:AES|←]]Created page with ' [[Category:Railway companies of Japan]] [[Category:Rail transport in Shiga Prefecture]] [[Category:Seibu Group]]'" 113 "je7aw21fedbwyqsyofpisdrynsu7olr" false "Underbar dk" false false
|
||||
819091968 2018-01-07 10:43:32 56237375 "User talk:92.226.219.222" 3 false 882433 "[[WP:AES|←]]Created page with '{{3rr}}~~~~'" 199 "cpm4tkzcx4hc6irr9ukbi06ogud8dtq" false "TastyPoutine" false false
|
||||
819094036 2018-01-07 11:10:24 56237375 "User talk:92.226.219.222" 3 false 7611264 "[[User:AnomieBOT/docs/TemplateSubster|Substing templates]]: {{3rr}}. See [[User:AnomieBOT/docs/TemplateSubster]] for info." 1840 "artmfz8b2gxhb3pp8a5p4ksplxqfkpg" true "AnomieBOT" false false
|
||||
819112363 2018-01-07 14:33:36 56237375 "User talk:92.226.219.222" 3 false 702940 "Your IP address has been blocked from editing because it has been used to [[WP:EVADE|evade a previous block]]. ([[WP:TW|TW]])" 2949 "dn9wj0n8d8pdd5lqe56uw5xamupowr1" false "Only" false false "126, 126, 126, 126"
|
||||
819092004 2018-01-07 10:44:01 56237376 "User:Dipayanacharya" 2 false 32794237 "Education" 28 "ofueugwatmmn7u73isw732neuza57gk" false "Dipayanacharya" false false
|
||||
819092390 2018-01-07 10:49:08 56237376 "User:Dipayanacharya" 2 false 32794237 "School" 38 "dsz55xv96ec2uv6w9c1z7c52ipfovbw" false "Dipayanacharya" false false
|
||||
819092066 2018-01-07 10:44:56 56237378 "BSCIC" 0 false 21516552 "[[WP:AES|←]]Redirected page to [[Bangladesh Small and Cottage Industries Corporation]]" 65 "9ma38hak0ef1ew4fpiutxpnzd8oz1wd" false "Vinegarymass911" false false
|
||||
819092102 2018-01-07 10:45:21 56237379 "Category:Women government ministers of Yemen" 14 false 754619 "[[WP:AES|←]]Created page with '{{portal|Yemen|Politics}} {{Non-diffusing subcategory|Government ministers of Yemen}} {{Underpopulated category}} Category:Women government ministers by nati...'" 285 "4dvakoat58bzyf5hmtthxukt29hip6n" false "BrownHairedGirl" false false
|
||||
819092135 2018-01-07 10:45:54 56237381 "Talk:List of Morning Glories Characters" 1 false 410898 "[[WP:AES|←]]Created page with '{{WikiProject Fictional characters|class=List|importance=low}} {{Comicsproj|class=List|importance=low}}'" 103 "2sjrxsc7os9k9pg4su2t4rk2j8nn0h7" false "PRehse" false false
|
||||
819092138 2018-01-07 10:45:56 56237382 "User talk:106.207.126.114" 3 false 13286072 "Warning [[Special:Contributions/106.207.126.114|106.207.126.114]] - #1" 1330 "3y9t5wpk6ur5jhone75rhm4wjf01fgi" false "ClueBot NG" false false "106, 207, 126, 114, 106, 207, 126, 114, 106, 207, 126, 114, 106, 207, 126, 114"
|
||||
819092495 2018-01-07 10:50:22 56237382 "User talk:106.207.126.114" 3 false 31190506 "Caution: Unconstructive editing on [[List of Baahubali characters]]. ([[WP:TW|TW]])" 2355 "8wvn6vh3isyt0dorpe89lztrburgupe" false "HindWIKI" false false "106, 207, 126, 114, 106, 207, 126, 114, 106, 207, 126, 114, 106, 207, 126, 114"
|
||||
|
|
@ -1,27 +1,27 @@
|
||||
anon articleid date_time deleted editor editorid minor namespace page_word revert reverteds revid sha1 testcases text_chars title
|
||||
FALSE 56237363 2018-01-07 10:40:58 FALSE "NinjaRobotPirate" 3742946 FALSE 3 page, page FALSE 819091731 135nz8q6lfam6cojla7azb7k5alx3t3 1141 "User talk:86.139.142.254"
|
||||
FALSE 56237364 2018-01-07 10:41:10 FALSE "Kavin kavitha" 32792125 FALSE 3 FALSE 819091755 0pwezjc6yopz0smc8al6ogc4fax5bwo 663 "User talk:Kavin kavitha"
|
||||
FALSE 56237365 2018-01-07 10:41:26 FALSE "Amicable always" 32621254 FALSE 3 FALSE 819091788 sz3t2ap7z8bpkdvdvi195f3i35949bv TestCase, TestCase 399 "User talk:Dr.vivek163"
|
||||
FALSE 56237366 2018-01-07 10:41:31 FALSE "ClueBot NG" 13286072 FALSE 3 page FALSE 819091796 r6s5j8j3iykenrhuhpnkpsmmd71vubf 1260 "User talk:Twistorl"
|
||||
FALSE 56237368 2018-01-07 10:41:51 FALSE "Khruner" 8409334 FALSE 0 page FALSE 819091825 tf5qz2yaswx61zrlm9ovxzuhl7r2dc4 TestCase 2249 "Kom Firin"
|
||||
FALSE 56237368 2018-01-27 12:16:02 FALSE "Khruner" 8409334 TRUE 0 page FALSE 822610647 e6oa4g0qv64icdaq26uu1zzbyr5hcbh 2230 "Kom Firin"
|
||||
FALSE 56237369 2018-01-07 10:42:05 FALSE "Editingaccount1994" 32794215 FALSE 2 page, page FALSE 819091844 0fyvyh2a8xu41gt8obr34oba0bfixj6 27840 "User:Editingaccount1994/sandbox"
|
||||
FALSE 56237369 2018-01-07 11:09:52 FALSE "AnomieBOT" 7611264 TRUE 2 page, page FALSE 819093984 8gy52aolt5rg3eaketwj5v7eiw0apv2 27787 "User:Editingaccount1994/sandbox"
|
||||
FALSE 56237369 2018-01-12 21:45:50 FALSE "SporkBot" 12406635 TRUE 2 page, page FALSE 820064189 he8ydemaanxlrpftqxkez8jfpge1fsj 27784 "User:Editingaccount1994/sandbox"
|
||||
FALSE 56237369 2018-01-12 23:28:11 FALSE "SporkBot" 12406635 TRUE 2 page, page FALSE 820078679 0to17w9rth3url8n7gvucdtobybdq5h 27783 "User:Editingaccount1994/sandbox"
|
||||
FALSE 56237369 2018-01-12 23:28:39 FALSE "SporkBot" 12406635 TRUE 2 page, page FALSE 820078733 531dizmmloyxffbkdr5vph7owh921eg 27782 "User:Editingaccount1994/sandbox"
|
||||
FALSE 56237369 2018-01-13 13:45:33 FALSE "Frietjes" 13791031 FALSE 2 page, page FALSE 820177382 nik9p2u2fuk4yazjxt8ymbicxv5qid9 27757 "User:Editingaccount1994/sandbox"
|
||||
FALSE 56237369 2018-01-24 01:35:22 FALSE "CommonsDelinker" 2304267 FALSE 2 page, page FALSE 822038928 gwk6pampl8si1v5pv3kwgteg710sfw3 27667 "User:Editingaccount1994/sandbox"
|
||||
FALSE 56237370 2018-01-07 10:42:20 FALSE "PamD" 1368779 FALSE 0 FALSE 819091874 n4ozbsgle13p9yywtfrz982ccj8woc9 25 "Anita del Rey"
|
||||
FALSE 56237371 2018-01-07 10:42:27 FALSE "ClueBot NG" 13286072 FALSE 3 page FALSE 819091883 ksohnvsbeuzwpl5vb8a3v8m18hva0a7 1274 "User talk:119.94.96.157"
|
||||
FALSE 56237372 2018-01-07 10:42:50 FALSE "Underbar dk" 677153 FALSE 14 FALSE 819091914 je7aw21fedbwyqsyofpisdrynsu7olr 113 "Category:Ohmi Railway"
|
||||
FALSE 56237375 2018-01-07 10:43:32 FALSE "TastyPoutine" 882433 FALSE 3 FALSE 819091968 cpm4tkzcx4hc6irr9ukbi06ogud8dtq 199 "User talk:92.226.219.222"
|
||||
FALSE 56237375 2018-01-07 11:10:24 FALSE "AnomieBOT" 7611264 TRUE 3 page, page, page, page FALSE 819094036 artmfz8b2gxhb3pp8a5p4ksplxqfkpg 1840 "User talk:92.226.219.222"
|
||||
FALSE 56237375 2018-01-07 14:33:36 FALSE "Only" 702940 FALSE 3 page, page, page, page, page, page FALSE 819112363 dn9wj0n8d8pdd5lqe56uw5xamupowr1 2949 "User talk:92.226.219.222"
|
||||
FALSE 56237376 2018-01-07 10:44:01 FALSE "Dipayanacharya" 32794237 FALSE 2 FALSE 819092004 ofueugwatmmn7u73isw732neuza57gk 28 "User:Dipayanacharya"
|
||||
FALSE 56237376 2018-01-07 10:49:08 FALSE "Dipayanacharya" 32794237 FALSE 2 FALSE 819092390 dsz55xv96ec2uv6w9c1z7c52ipfovbw 38 "User:Dipayanacharya"
|
||||
FALSE 56237378 2018-01-07 10:44:56 FALSE "Vinegarymass911" 21516552 FALSE 0 FALSE 819092066 9ma38hak0ef1ew4fpiutxpnzd8oz1wd 65 "BSCIC"
|
||||
FALSE 56237379 2018-01-07 10:45:21 FALSE "BrownHairedGirl" 754619 FALSE 14 FALSE 819092102 4dvakoat58bzyf5hmtthxukt29hip6n 285 "Category:Women government ministers of Yemen"
|
||||
FALSE 56237381 2018-01-07 10:45:54 FALSE "PRehse" 410898 FALSE 1 FALSE 819092135 2sjrxsc7os9k9pg4su2t4rk2j8nn0h7 103 "Talk:List of Morning Glories Characters"
|
||||
FALSE 56237382 2018-01-07 10:45:56 FALSE "ClueBot NG" 13286072 FALSE 3 page FALSE 819092138 3y9t5wpk6ur5jhone75rhm4wjf01fgi 1330 "User talk:106.207.126.114"
|
||||
FALSE 56237382 2018-01-07 10:50:22 FALSE "HindWIKI" 31190506 FALSE 3 page FALSE 819092495 8wvn6vh3isyt0dorpe89lztrburgupe 2355 "User talk:106.207.126.114"
|
||||
"revid" "date_time" "articleid" "title" "namespace" "deleted" "editorid" "edit_summary" "text_chars" "reverteds" "sha1" "minor" "editor" "anon" "revert" "testcases" "page_word"
|
||||
819091731 2018-01-07 10:40:58 56237363 "User talk:86.139.142.254" 3 false 3742946 "Your IP address has been blocked from editing because it has been used to [[WP:EVADE|evade a previous block]]. ([[WP:TW|TW]])" 1141 "135nz8q6lfam6cojla7azb7k5alx3t3" false "NinjaRobotPirate" false false "page, page"
|
||||
819091755 2018-01-07 10:41:10 56237364 "User talk:Kavin kavitha" 3 false 32792125 "[[WP:AES|←]]Created page with ''''''Kavin (Tamil. கவின்) is a masculine given name, which is Tamil for ""beauty"", ""grace"", ""fairness"" or ""comeliness""Kavin is born on 01 /12/2001 at Sa...'" 663 "0pwezjc6yopz0smc8al6ogc4fax5bwo" false "Kavin kavitha" false false
|
||||
819091788 2018-01-07 10:41:26 56237365 "User talk:Dr.vivek163" 3 false 32621254 "/* Regarding Merger discussion */ new section" 399 "sz3t2ap7z8bpkdvdvi195f3i35949bv" false "Amicable always" false false "TestCase, TestCase"
|
||||
819091796 2018-01-07 10:41:31 56237366 "User talk:Twistorl" 3 false 13286072 "Warning [[Special:Contributions/Twistorl|Twistorl]] - #1" 1260 "r6s5j8j3iykenrhuhpnkpsmmd71vubf" false "ClueBot NG" false false "page"
|
||||
819091825 2018-01-07 10:41:51 56237368 "Kom Firin" 0 false 8409334 "[[WP:AES|←]]Created page with '[[File:Stele 67.119 Brooklyn.jpg|thumb|Stele of the [[Libu#Great Chiefs of the Libu|Chief of the Libu]] Titaru, a contemporary of pharaoh [[Shoshenq V]] of the [...'TestCaseB and you're a Tor node " 2249 "tf5qz2yaswx61zrlm9ovxzuhl7r2dc4" false "Khruner" false false "TestCase" "page"
|
||||
822610647 2018-01-27 12:16:02 56237368 "Kom Firin" 0 false 8409334 "/* History */ typo" 2230 "e6oa4g0qv64icdaq26uu1zzbyr5hcbh" true "Khruner" false false "page"
|
||||
819091844 2018-01-07 10:42:05 56237369 "User:Editingaccount1994/sandbox" 2 false 32794215 "[[WP:AES|←]]Created page with '{{User sandbox}} <!-- EDIT BELOW THIS LINE --> {{voir homonymes|Chevalier}} {{Infobox Artiste | nom = Li Chevalier | autres noms = | im...'" 27840 "0fyvyh2a8xu41gt8obr34oba0bfixj6" false "Editingaccount1994" false false "page, page"
|
||||
819093984 2018-01-07 11:09:52 56237369 "User:Editingaccount1994/sandbox" 2 false 7611264 "[[User:AnomieBOT/docs/TemplateSubster|Substing templates]]: {{Lien web}}. See [[User:AnomieBOT/docs/TemplateSubster]] for info." 27787 "8gy52aolt5rg3eaketwj5v7eiw0apv2" true "AnomieBOT" false false "page, page"
|
||||
820064189 2018-01-12 21:45:50 56237369 "User:Editingaccount1994/sandbox" 2 false 12406635 "Orphan per [[WP:TFD|TFD outcome]]" 27784 "he8ydemaanxlrpftqxkez8jfpge1fsj" true "SporkBot" false false "page, page"
|
||||
820078679 2018-01-12 23:28:11 56237369 "User:Editingaccount1994/sandbox" 2 false 12406635 "Replace template per [[Wikipedia:Templates for discussion/Log/2010 June 13|TFD outcome]]; no change in content" 27783 "0to17w9rth3url8n7gvucdtobybdq5h" true "SporkBot" false false "page, page"
|
||||
820078733 2018-01-12 23:28:39 56237369 "User:Editingaccount1994/sandbox" 2 false 12406635 "Replace template per [[Wikipedia:Templates for discussion/Log/2011 February 17|TFD outcome]]; no change in content" 27782 "531dizmmloyxffbkdr5vph7owh921eg" true "SporkBot" false false "page, page"
|
||||
820177382 2018-01-13 13:45:33 56237369 "User:Editingaccount1994/sandbox" 2 false 13791031 "translate TestCaseD if you are from tor you need neutral point of view " 27757 "nik9p2u2fuk4yazjxt8ymbicxv5qid9" false "Frietjes" false false "page, page"
|
||||
822038928 2018-01-24 01:35:22 56237369 "User:Editingaccount1994/sandbox" 2 false 2304267 "Removing [[:c:File:Li_Chevalier_Art_Studio.jpg|Li_Chevalier_Art_Studio.jpg]], it has been deleted from Commons by [[:c:User:JuTa|JuTa]] because: [[:c:COM:OTRS|No permission]] since 16 January 2018." 27667 "gwk6pampl8si1v5pv3kwgteg710sfw3" false "CommonsDelinker" false false "page, page"
|
||||
819091874 2018-01-07 10:42:20 56237370 "Anita del Rey" 0 false 1368779 "r from alt name" 25 "n4ozbsgle13p9yywtfrz982ccj8woc9" false "PamD" false false
|
||||
819091883 2018-01-07 10:42:27 56237371 "User talk:119.94.96.157" 3 false 13286072 "Warning [[Special:Contributions/119.94.96.157|119.94.96.157]] - #1" 1274 "ksohnvsbeuzwpl5vb8a3v8m18hva0a7" false "ClueBot NG" false false "page"
|
||||
819091914 2018-01-07 10:42:50 56237372 "Category:Ohmi Railway" 14 false 677153 "[[WP:AES|←]]Created page with ' [[Category:Railway companies of Japan]] [[Category:Rail transport in Shiga Prefecture]] [[Category:Seibu Group]]'" 113 "je7aw21fedbwyqsyofpisdrynsu7olr" false "Underbar dk" false false
|
||||
819091968 2018-01-07 10:43:32 56237375 "User talk:92.226.219.222" 3 false 882433 "[[WP:AES|←]]Created page with '{{3rr}}~~~~'" 199 "cpm4tkzcx4hc6irr9ukbi06ogud8dtq" false "TastyPoutine" false false
|
||||
819094036 2018-01-07 11:10:24 56237375 "User talk:92.226.219.222" 3 false 7611264 "[[User:AnomieBOT/docs/TemplateSubster|Substing templates]]: {{3rr}}. See [[User:AnomieBOT/docs/TemplateSubster]] for info." 1840 "artmfz8b2gxhb3pp8a5p4ksplxqfkpg" true "AnomieBOT" false false "page, page, page, page"
|
||||
819112363 2018-01-07 14:33:36 56237375 "User talk:92.226.219.222" 3 false 702940 "Your IP address has been blocked from editing because it has been used to [[WP:EVADE|evade a previous block]]. ([[WP:TW|TW]])" 2949 "dn9wj0n8d8pdd5lqe56uw5xamupowr1" false "Only" false false "page, page, page, page, page, page"
|
||||
819092004 2018-01-07 10:44:01 56237376 "User:Dipayanacharya" 2 false 32794237 "Education" 28 "ofueugwatmmn7u73isw732neuza57gk" false "Dipayanacharya" false false
|
||||
819092390 2018-01-07 10:49:08 56237376 "User:Dipayanacharya" 2 false 32794237 "School" 38 "dsz55xv96ec2uv6w9c1z7c52ipfovbw" false "Dipayanacharya" false false
|
||||
819092066 2018-01-07 10:44:56 56237378 "BSCIC" 0 false 21516552 "[[WP:AES|←]]Redirected page to [[Bangladesh Small and Cottage Industries Corporation]]" 65 "9ma38hak0ef1ew4fpiutxpnzd8oz1wd" false "Vinegarymass911" false false
|
||||
819092102 2018-01-07 10:45:21 56237379 "Category:Women government ministers of Yemen" 14 false 754619 "[[WP:AES|←]]Created page with '{{portal|Yemen|Politics}} {{Non-diffusing subcategory|Government ministers of Yemen}} {{Underpopulated category}} Category:Women government ministers by nati...'" 285 "4dvakoat58bzyf5hmtthxukt29hip6n" false "BrownHairedGirl" false false
|
||||
819092135 2018-01-07 10:45:54 56237381 "Talk:List of Morning Glories Characters" 1 false 410898 "[[WP:AES|←]]Created page with '{{WikiProject Fictional characters|class=List|importance=low}} {{Comicsproj|class=List|importance=low}}'" 103 "2sjrxsc7os9k9pg4su2t4rk2j8nn0h7" false "PRehse" false false
|
||||
819092138 2018-01-07 10:45:56 56237382 "User talk:106.207.126.114" 3 false 13286072 "Warning [[Special:Contributions/106.207.126.114|106.207.126.114]] - #1" 1330 "3y9t5wpk6ur5jhone75rhm4wjf01fgi" false "ClueBot NG" false false "page"
|
||||
819092495 2018-01-07 10:50:22 56237382 "User talk:106.207.126.114" 3 false 31190506 "Caution: Unconstructive editing on [[List of Baahubali characters]]. ([[WP:TW|TW]])" 2355 "8wvn6vh3isyt0dorpe89lztrburgupe" false "HindWIKI" false false "page"
|
||||
|
|
@ -1,27 +1,27 @@
|
||||
anon articleid chev_com date_time deleted editor editorid minor namespace revert reverteds revid sha1 text_chars title warning wiki_welcome
|
||||
FALSE 56237363 2018-01-07 10:40:58 FALSE "NinjaRobotPirate" 3742946 FALSE 3 FALSE 819091731 135nz8q6lfam6cojla7azb7k5alx3t3 1141 "User talk:86.139.142.254"
|
||||
FALSE 56237364 2018-01-07 10:41:10 FALSE "Kavin kavitha" 32792125 FALSE 3 FALSE 819091755 0pwezjc6yopz0smc8al6ogc4fax5bwo 663 "User talk:Kavin kavitha"
|
||||
FALSE 56237365 2018-01-07 10:41:26 FALSE "Amicable always" 32621254 FALSE 3 FALSE 819091788 sz3t2ap7z8bpkdvdvi195f3i35949bv 399 "User talk:Dr.vivek163"
|
||||
FALSE 56237366 2018-01-07 10:41:31 FALSE "ClueBot NG" 13286072 FALSE 3 FALSE 819091796 r6s5j8j3iykenrhuhpnkpsmmd71vubf 1260 "User talk:Twistorl" Warning welcome to Wikipedia
|
||||
FALSE 56237368 2018-01-07 10:41:51 FALSE "Khruner" 8409334 FALSE 0 FALSE 819091825 tf5qz2yaswx61zrlm9ovxzuhl7r2dc4 2249 "Kom Firin"
|
||||
FALSE 56237368 2018-01-27 12:16:02 FALSE "Khruner" 8409334 TRUE 0 FALSE 822610647 e6oa4g0qv64icdaq26uu1zzbyr5hcbh 2230 "Kom Firin"
|
||||
FALSE 56237369 Chevalier, Chevalier 2018-01-07 10:42:05 FALSE "Editingaccount1994" 32794215 FALSE 2 FALSE 819091844 0fyvyh2a8xu41gt8obr34oba0bfixj6 27840 "User:Editingaccount1994/sandbox"
|
||||
FALSE 56237369 2018-01-07 11:09:52 FALSE "AnomieBOT" 7611264 TRUE 2 FALSE 819093984 8gy52aolt5rg3eaketwj5v7eiw0apv2 27787 "User:Editingaccount1994/sandbox"
|
||||
FALSE 56237369 2018-01-12 21:45:50 FALSE "SporkBot" 12406635 TRUE 2 FALSE 820064189 he8ydemaanxlrpftqxkez8jfpge1fsj 27784 "User:Editingaccount1994/sandbox"
|
||||
FALSE 56237369 2018-01-12 23:28:11 FALSE "SporkBot" 12406635 TRUE 2 FALSE 820078679 0to17w9rth3url8n7gvucdtobybdq5h 27783 "User:Editingaccount1994/sandbox"
|
||||
FALSE 56237369 2018-01-12 23:28:39 FALSE "SporkBot" 12406635 TRUE 2 FALSE 820078733 531dizmmloyxffbkdr5vph7owh921eg 27782 "User:Editingaccount1994/sandbox"
|
||||
FALSE 56237369 2018-01-13 13:45:33 FALSE "Frietjes" 13791031 FALSE 2 FALSE 820177382 nik9p2u2fuk4yazjxt8ymbicxv5qid9 27757 "User:Editingaccount1994/sandbox"
|
||||
FALSE 56237369 Chevalier, Chevalier 2018-01-24 01:35:22 FALSE "CommonsDelinker" 2304267 FALSE 2 FALSE 822038928 gwk6pampl8si1v5pv3kwgteg710sfw3 27667 "User:Editingaccount1994/sandbox"
|
||||
FALSE 56237370 2018-01-07 10:42:20 FALSE "PamD" 1368779 FALSE 0 FALSE 819091874 n4ozbsgle13p9yywtfrz982ccj8woc9 25 "Anita del Rey"
|
||||
FALSE 56237371 2018-01-07 10:42:27 FALSE "ClueBot NG" 13286072 FALSE 3 FALSE 819091883 ksohnvsbeuzwpl5vb8a3v8m18hva0a7 1274 "User talk:119.94.96.157" Warning welcome to Wikipedia
|
||||
FALSE 56237372 2018-01-07 10:42:50 FALSE "Underbar dk" 677153 FALSE 14 FALSE 819091914 je7aw21fedbwyqsyofpisdrynsu7olr 113 "Category:Ohmi Railway"
|
||||
FALSE 56237375 2018-01-07 10:43:32 FALSE "TastyPoutine" 882433 FALSE 3 FALSE 819091968 cpm4tkzcx4hc6irr9ukbi06ogud8dtq 199 "User talk:92.226.219.222"
|
||||
FALSE 56237375 2018-01-07 11:10:24 FALSE "AnomieBOT" 7611264 TRUE 3 FALSE 819094036 artmfz8b2gxhb3pp8a5p4ksplxqfkpg 1840 "User talk:92.226.219.222"
|
||||
FALSE 56237375 2018-01-07 14:33:36 FALSE "Only" 702940 FALSE 3 FALSE 819112363 dn9wj0n8d8pdd5lqe56uw5xamupowr1 2949 "User talk:92.226.219.222"
|
||||
FALSE 56237376 2018-01-07 10:44:01 FALSE "Dipayanacharya" 32794237 FALSE 2 FALSE 819092004 ofueugwatmmn7u73isw732neuza57gk 28 "User:Dipayanacharya"
|
||||
FALSE 56237376 2018-01-07 10:49:08 FALSE "Dipayanacharya" 32794237 FALSE 2 FALSE 819092390 dsz55xv96ec2uv6w9c1z7c52ipfovbw 38 "User:Dipayanacharya"
|
||||
FALSE 56237378 2018-01-07 10:44:56 FALSE "Vinegarymass911" 21516552 FALSE 0 FALSE 819092066 9ma38hak0ef1ew4fpiutxpnzd8oz1wd 65 "BSCIC"
|
||||
FALSE 56237379 2018-01-07 10:45:21 FALSE "BrownHairedGirl" 754619 FALSE 14 FALSE 819092102 4dvakoat58bzyf5hmtthxukt29hip6n 285 "Category:Women government ministers of Yemen"
|
||||
FALSE 56237381 2018-01-07 10:45:54 FALSE "PRehse" 410898 FALSE 1 FALSE 819092135 2sjrxsc7os9k9pg4su2t4rk2j8nn0h7 103 "Talk:List of Morning Glories Characters"
|
||||
FALSE 56237382 2018-01-07 10:45:56 FALSE "ClueBot NG" 13286072 FALSE 3 FALSE 819092138 3y9t5wpk6ur5jhone75rhm4wjf01fgi 1330 "User talk:106.207.126.114" Warning welcome to Wikipedia
|
||||
FALSE 56237382 2018-01-07 10:50:22 FALSE "HindWIKI" 31190506 FALSE 3 FALSE 819092495 8wvn6vh3isyt0dorpe89lztrburgupe 2355 "User talk:106.207.126.114" welcome to Wikipedia
|
||||
"revid" "date_time" "articleid" "title" "namespace" "deleted" "editorid" "edit_summary" "text_chars" "reverteds" "sha1" "minor" "editor" "anon" "revert" "wiki_welcome" "chev_com" "warning"
|
||||
819091731 2018-01-07 10:40:58 56237363 "User talk:86.139.142.254" 3 false 3742946 "Your IP address has been blocked from editing because it has been used to [[WP:EVADE|evade a previous block]]. ([[WP:TW|TW]])" 1141 "135nz8q6lfam6cojla7azb7k5alx3t3" false "NinjaRobotPirate" false false
|
||||
819091755 2018-01-07 10:41:10 56237364 "User talk:Kavin kavitha" 3 false 32792125 "[[WP:AES|←]]Created page with ''''''Kavin (Tamil. கவின்) is a masculine given name, which is Tamil for ""beauty"", ""grace"", ""fairness"" or ""comeliness""Kavin is born on 01 /12/2001 at Sa...'" 663 "0pwezjc6yopz0smc8al6ogc4fax5bwo" false "Kavin kavitha" false false
|
||||
819091788 2018-01-07 10:41:26 56237365 "User talk:Dr.vivek163" 3 false 32621254 "/* Regarding Merger discussion */ new section" 399 "sz3t2ap7z8bpkdvdvi195f3i35949bv" false "Amicable always" false false
|
||||
819091796 2018-01-07 10:41:31 56237366 "User talk:Twistorl" 3 false 13286072 "Warning [[Special:Contributions/Twistorl|Twistorl]] - #1" 1260 "r6s5j8j3iykenrhuhpnkpsmmd71vubf" false "ClueBot NG" false false "welcome to Wikipedia" "Warning"
|
||||
819091825 2018-01-07 10:41:51 56237368 "Kom Firin" 0 false 8409334 "[[WP:AES|←]]Created page with '[[File:Stele 67.119 Brooklyn.jpg|thumb|Stele of the [[Libu#Great Chiefs of the Libu|Chief of the Libu]] Titaru, a contemporary of pharaoh [[Shoshenq V]] of the [...'TestCaseB and you're a Tor node " 2249 "tf5qz2yaswx61zrlm9ovxzuhl7r2dc4" false "Khruner" false false
|
||||
822610647 2018-01-27 12:16:02 56237368 "Kom Firin" 0 false 8409334 "/* History */ typo" 2230 "e6oa4g0qv64icdaq26uu1zzbyr5hcbh" true "Khruner" false false
|
||||
819091844 2018-01-07 10:42:05 56237369 "User:Editingaccount1994/sandbox" 2 false 32794215 "[[WP:AES|←]]Created page with '{{User sandbox}} <!-- EDIT BELOW THIS LINE --> {{voir homonymes|Chevalier}} {{Infobox Artiste | nom = Li Chevalier | autres noms = | im...'" 27840 "0fyvyh2a8xu41gt8obr34oba0bfixj6" false "Editingaccount1994" false false "Chevalier, Chevalier"
|
||||
819093984 2018-01-07 11:09:52 56237369 "User:Editingaccount1994/sandbox" 2 false 7611264 "[[User:AnomieBOT/docs/TemplateSubster|Substing templates]]: {{Lien web}}. See [[User:AnomieBOT/docs/TemplateSubster]] for info." 27787 "8gy52aolt5rg3eaketwj5v7eiw0apv2" true "AnomieBOT" false false
|
||||
820064189 2018-01-12 21:45:50 56237369 "User:Editingaccount1994/sandbox" 2 false 12406635 "Orphan per [[WP:TFD|TFD outcome]]" 27784 "he8ydemaanxlrpftqxkez8jfpge1fsj" true "SporkBot" false false
|
||||
820078679 2018-01-12 23:28:11 56237369 "User:Editingaccount1994/sandbox" 2 false 12406635 "Replace template per [[Wikipedia:Templates for discussion/Log/2010 June 13|TFD outcome]]; no change in content" 27783 "0to17w9rth3url8n7gvucdtobybdq5h" true "SporkBot" false false
|
||||
820078733 2018-01-12 23:28:39 56237369 "User:Editingaccount1994/sandbox" 2 false 12406635 "Replace template per [[Wikipedia:Templates for discussion/Log/2011 February 17|TFD outcome]]; no change in content" 27782 "531dizmmloyxffbkdr5vph7owh921eg" true "SporkBot" false false
|
||||
820177382 2018-01-13 13:45:33 56237369 "User:Editingaccount1994/sandbox" 2 false 13791031 "translate TestCaseD if you are from tor you need neutral point of view " 27757 "nik9p2u2fuk4yazjxt8ymbicxv5qid9" false "Frietjes" false false
|
||||
822038928 2018-01-24 01:35:22 56237369 "User:Editingaccount1994/sandbox" 2 false 2304267 "Removing [[:c:File:Li_Chevalier_Art_Studio.jpg|Li_Chevalier_Art_Studio.jpg]], it has been deleted from Commons by [[:c:User:JuTa|JuTa]] because: [[:c:COM:OTRS|No permission]] since 16 January 2018." 27667 "gwk6pampl8si1v5pv3kwgteg710sfw3" false "CommonsDelinker" false false "Chevalier, Chevalier"
|
||||
819091874 2018-01-07 10:42:20 56237370 "Anita del Rey" 0 false 1368779 "r from alt name" 25 "n4ozbsgle13p9yywtfrz982ccj8woc9" false "PamD" false false
|
||||
819091883 2018-01-07 10:42:27 56237371 "User talk:119.94.96.157" 3 false 13286072 "Warning [[Special:Contributions/119.94.96.157|119.94.96.157]] - #1" 1274 "ksohnvsbeuzwpl5vb8a3v8m18hva0a7" false "ClueBot NG" false false "welcome to Wikipedia" "Warning"
|
||||
819091914 2018-01-07 10:42:50 56237372 "Category:Ohmi Railway" 14 false 677153 "[[WP:AES|←]]Created page with ' [[Category:Railway companies of Japan]] [[Category:Rail transport in Shiga Prefecture]] [[Category:Seibu Group]]'" 113 "je7aw21fedbwyqsyofpisdrynsu7olr" false "Underbar dk" false false
|
||||
819091968 2018-01-07 10:43:32 56237375 "User talk:92.226.219.222" 3 false 882433 "[[WP:AES|←]]Created page with '{{3rr}}~~~~'" 199 "cpm4tkzcx4hc6irr9ukbi06ogud8dtq" false "TastyPoutine" false false
|
||||
819094036 2018-01-07 11:10:24 56237375 "User talk:92.226.219.222" 3 false 7611264 "[[User:AnomieBOT/docs/TemplateSubster|Substing templates]]: {{3rr}}. See [[User:AnomieBOT/docs/TemplateSubster]] for info." 1840 "artmfz8b2gxhb3pp8a5p4ksplxqfkpg" true "AnomieBOT" false false
|
||||
819112363 2018-01-07 14:33:36 56237375 "User talk:92.226.219.222" 3 false 702940 "Your IP address has been blocked from editing because it has been used to [[WP:EVADE|evade a previous block]]. ([[WP:TW|TW]])" 2949 "dn9wj0n8d8pdd5lqe56uw5xamupowr1" false "Only" false false
|
||||
819092004 2018-01-07 10:44:01 56237376 "User:Dipayanacharya" 2 false 32794237 "Education" 28 "ofueugwatmmn7u73isw732neuza57gk" false "Dipayanacharya" false false
|
||||
819092390 2018-01-07 10:49:08 56237376 "User:Dipayanacharya" 2 false 32794237 "School" 38 "dsz55xv96ec2uv6w9c1z7c52ipfovbw" false "Dipayanacharya" false false
|
||||
819092066 2018-01-07 10:44:56 56237378 "BSCIC" 0 false 21516552 "[[WP:AES|←]]Redirected page to [[Bangladesh Small and Cottage Industries Corporation]]" 65 "9ma38hak0ef1ew4fpiutxpnzd8oz1wd" false "Vinegarymass911" false false
|
||||
819092102 2018-01-07 10:45:21 56237379 "Category:Women government ministers of Yemen" 14 false 754619 "[[WP:AES|←]]Created page with '{{portal|Yemen|Politics}} {{Non-diffusing subcategory|Government ministers of Yemen}} {{Underpopulated category}} Category:Women government ministers by nati...'" 285 "4dvakoat58bzyf5hmtthxukt29hip6n" false "BrownHairedGirl" false false
|
||||
819092135 2018-01-07 10:45:54 56237381 "Talk:List of Morning Glories Characters" 1 false 410898 "[[WP:AES|←]]Created page with '{{WikiProject Fictional characters|class=List|importance=low}} {{Comicsproj|class=List|importance=low}}'" 103 "2sjrxsc7os9k9pg4su2t4rk2j8nn0h7" false "PRehse" false false
|
||||
819092138 2018-01-07 10:45:56 56237382 "User talk:106.207.126.114" 3 false 13286072 "Warning [[Special:Contributions/106.207.126.114|106.207.126.114]] - #1" 1330 "3y9t5wpk6ur5jhone75rhm4wjf01fgi" false "ClueBot NG" false false "welcome to Wikipedia" "Warning"
|
||||
819092495 2018-01-07 10:50:22 56237382 "User talk:106.207.126.114" 3 false 31190506 "Caution: Unconstructive editing on [[List of Baahubali characters]]. ([[WP:TW|TW]])" 2355 "8wvn6vh3isyt0dorpe89lztrburgupe" false "HindWIKI" false false "welcome to Wikipedia"
|
||||
|
|
@ -1,27 +1,27 @@
|
||||
anon articleid date_time deleted editor editorid minor namespace revert reverteds revid sha1 text_chars title wp_evade
|
||||
FALSE 56237363 2018-01-07 10:40:58 FALSE "NinjaRobotPirate" 3742946 FALSE 3 FALSE 819091731 135nz8q6lfam6cojla7azb7k5alx3t3 1141 "User talk:86.139.142.254" WP:EVADE
|
||||
FALSE 56237364 2018-01-07 10:41:10 FALSE "Kavin kavitha" 32792125 FALSE 3 FALSE 819091755 0pwezjc6yopz0smc8al6ogc4fax5bwo 663 "User talk:Kavin kavitha"
|
||||
FALSE 56237365 2018-01-07 10:41:26 FALSE "Amicable always" 32621254 FALSE 3 FALSE 819091788 sz3t2ap7z8bpkdvdvi195f3i35949bv 399 "User talk:Dr.vivek163"
|
||||
FALSE 56237366 2018-01-07 10:41:31 FALSE "ClueBot NG" 13286072 FALSE 3 FALSE 819091796 r6s5j8j3iykenrhuhpnkpsmmd71vubf 1260 "User talk:Twistorl"
|
||||
FALSE 56237368 2018-01-07 10:41:51 FALSE "Khruner" 8409334 FALSE 0 FALSE 819091825 tf5qz2yaswx61zrlm9ovxzuhl7r2dc4 2249 "Kom Firin"
|
||||
FALSE 56237368 2018-01-27 12:16:02 FALSE "Khruner" 8409334 TRUE 0 FALSE 822610647 e6oa4g0qv64icdaq26uu1zzbyr5hcbh 2230 "Kom Firin"
|
||||
FALSE 56237369 2018-01-07 10:42:05 FALSE "Editingaccount1994" 32794215 FALSE 2 FALSE 819091844 0fyvyh2a8xu41gt8obr34oba0bfixj6 27840 "User:Editingaccount1994/sandbox"
|
||||
FALSE 56237369 2018-01-07 11:09:52 FALSE "AnomieBOT" 7611264 TRUE 2 FALSE 819093984 8gy52aolt5rg3eaketwj5v7eiw0apv2 27787 "User:Editingaccount1994/sandbox"
|
||||
FALSE 56237369 2018-01-12 21:45:50 FALSE "SporkBot" 12406635 TRUE 2 FALSE 820064189 he8ydemaanxlrpftqxkez8jfpge1fsj 27784 "User:Editingaccount1994/sandbox"
|
||||
FALSE 56237369 2018-01-12 23:28:11 FALSE "SporkBot" 12406635 TRUE 2 FALSE 820078679 0to17w9rth3url8n7gvucdtobybdq5h 27783 "User:Editingaccount1994/sandbox"
|
||||
FALSE 56237369 2018-01-12 23:28:39 FALSE "SporkBot" 12406635 TRUE 2 FALSE 820078733 531dizmmloyxffbkdr5vph7owh921eg 27782 "User:Editingaccount1994/sandbox"
|
||||
FALSE 56237369 2018-01-13 13:45:33 FALSE "Frietjes" 13791031 FALSE 2 FALSE 820177382 nik9p2u2fuk4yazjxt8ymbicxv5qid9 27757 "User:Editingaccount1994/sandbox"
|
||||
FALSE 56237369 2018-01-24 01:35:22 FALSE "CommonsDelinker" 2304267 FALSE 2 FALSE 822038928 gwk6pampl8si1v5pv3kwgteg710sfw3 27667 "User:Editingaccount1994/sandbox"
|
||||
FALSE 56237370 2018-01-07 10:42:20 FALSE "PamD" 1368779 FALSE 0 FALSE 819091874 n4ozbsgle13p9yywtfrz982ccj8woc9 25 "Anita del Rey"
|
||||
FALSE 56237371 2018-01-07 10:42:27 FALSE "ClueBot NG" 13286072 FALSE 3 FALSE 819091883 ksohnvsbeuzwpl5vb8a3v8m18hva0a7 1274 "User talk:119.94.96.157"
|
||||
FALSE 56237372 2018-01-07 10:42:50 FALSE "Underbar dk" 677153 FALSE 14 FALSE 819091914 je7aw21fedbwyqsyofpisdrynsu7olr 113 "Category:Ohmi Railway"
|
||||
FALSE 56237375 2018-01-07 10:43:32 FALSE "TastyPoutine" 882433 FALSE 3 FALSE 819091968 cpm4tkzcx4hc6irr9ukbi06ogud8dtq 199 "User talk:92.226.219.222"
|
||||
FALSE 56237375 2018-01-07 11:10:24 FALSE "AnomieBOT" 7611264 TRUE 3 FALSE 819094036 artmfz8b2gxhb3pp8a5p4ksplxqfkpg 1840 "User talk:92.226.219.222"
|
||||
FALSE 56237375 2018-01-07 14:33:36 FALSE "Only" 702940 FALSE 3 FALSE 819112363 dn9wj0n8d8pdd5lqe56uw5xamupowr1 2949 "User talk:92.226.219.222" WP:EVADE
|
||||
FALSE 56237376 2018-01-07 10:44:01 FALSE "Dipayanacharya" 32794237 FALSE 2 FALSE 819092004 ofueugwatmmn7u73isw732neuza57gk 28 "User:Dipayanacharya"
|
||||
FALSE 56237376 2018-01-07 10:49:08 FALSE "Dipayanacharya" 32794237 FALSE 2 FALSE 819092390 dsz55xv96ec2uv6w9c1z7c52ipfovbw 38 "User:Dipayanacharya"
|
||||
FALSE 56237378 2018-01-07 10:44:56 FALSE "Vinegarymass911" 21516552 FALSE 0 FALSE 819092066 9ma38hak0ef1ew4fpiutxpnzd8oz1wd 65 "BSCIC"
|
||||
FALSE 56237379 2018-01-07 10:45:21 FALSE "BrownHairedGirl" 754619 FALSE 14 FALSE 819092102 4dvakoat58bzyf5hmtthxukt29hip6n 285 "Category:Women government ministers of Yemen"
|
||||
FALSE 56237381 2018-01-07 10:45:54 FALSE "PRehse" 410898 FALSE 1 FALSE 819092135 2sjrxsc7os9k9pg4su2t4rk2j8nn0h7 103 "Talk:List of Morning Glories Characters"
|
||||
FALSE 56237382 2018-01-07 10:45:56 FALSE "ClueBot NG" 13286072 FALSE 3 FALSE 819092138 3y9t5wpk6ur5jhone75rhm4wjf01fgi 1330 "User talk:106.207.126.114"
|
||||
FALSE 56237382 2018-01-07 10:50:22 FALSE "HindWIKI" 31190506 FALSE 3 FALSE 819092495 8wvn6vh3isyt0dorpe89lztrburgupe 2355 "User talk:106.207.126.114"
|
||||
"revid" "date_time" "articleid" "title" "namespace" "deleted" "editorid" "edit_summary" "text_chars" "reverteds" "sha1" "minor" "editor" "anon" "revert" "wp_evade"
|
||||
819091731 2018-01-07 10:40:58 56237363 "User talk:86.139.142.254" 3 false 3742946 "Your IP address has been blocked from editing because it has been used to [[WP:EVADE|evade a previous block]]. ([[WP:TW|TW]])" 1141 "135nz8q6lfam6cojla7azb7k5alx3t3" false "NinjaRobotPirate" false false "WP:EVADE"
|
||||
819091755 2018-01-07 10:41:10 56237364 "User talk:Kavin kavitha" 3 false 32792125 "[[WP:AES|←]]Created page with ''''''Kavin (Tamil. கவின்) is a masculine given name, which is Tamil for ""beauty"", ""grace"", ""fairness"" or ""comeliness""Kavin is born on 01 /12/2001 at Sa...'" 663 "0pwezjc6yopz0smc8al6ogc4fax5bwo" false "Kavin kavitha" false false
|
||||
819091788 2018-01-07 10:41:26 56237365 "User talk:Dr.vivek163" 3 false 32621254 "/* Regarding Merger discussion */ new section" 399 "sz3t2ap7z8bpkdvdvi195f3i35949bv" false "Amicable always" false false
|
||||
819091796 2018-01-07 10:41:31 56237366 "User talk:Twistorl" 3 false 13286072 "Warning [[Special:Contributions/Twistorl|Twistorl]] - #1" 1260 "r6s5j8j3iykenrhuhpnkpsmmd71vubf" false "ClueBot NG" false false
|
||||
819091825 2018-01-07 10:41:51 56237368 "Kom Firin" 0 false 8409334 "[[WP:AES|←]]Created page with '[[File:Stele 67.119 Brooklyn.jpg|thumb|Stele of the [[Libu#Great Chiefs of the Libu|Chief of the Libu]] Titaru, a contemporary of pharaoh [[Shoshenq V]] of the [...'TestCaseB and you're a Tor node " 2249 "tf5qz2yaswx61zrlm9ovxzuhl7r2dc4" false "Khruner" false false
|
||||
822610647 2018-01-27 12:16:02 56237368 "Kom Firin" 0 false 8409334 "/* History */ typo" 2230 "e6oa4g0qv64icdaq26uu1zzbyr5hcbh" true "Khruner" false false
|
||||
819091844 2018-01-07 10:42:05 56237369 "User:Editingaccount1994/sandbox" 2 false 32794215 "[[WP:AES|←]]Created page with '{{User sandbox}} <!-- EDIT BELOW THIS LINE --> {{voir homonymes|Chevalier}} {{Infobox Artiste | nom = Li Chevalier | autres noms = | im...'" 27840 "0fyvyh2a8xu41gt8obr34oba0bfixj6" false "Editingaccount1994" false false
|
||||
819093984 2018-01-07 11:09:52 56237369 "User:Editingaccount1994/sandbox" 2 false 7611264 "[[User:AnomieBOT/docs/TemplateSubster|Substing templates]]: {{Lien web}}. See [[User:AnomieBOT/docs/TemplateSubster]] for info." 27787 "8gy52aolt5rg3eaketwj5v7eiw0apv2" true "AnomieBOT" false false
|
||||
820064189 2018-01-12 21:45:50 56237369 "User:Editingaccount1994/sandbox" 2 false 12406635 "Orphan per [[WP:TFD|TFD outcome]]" 27784 "he8ydemaanxlrpftqxkez8jfpge1fsj" true "SporkBot" false false
|
||||
820078679 2018-01-12 23:28:11 56237369 "User:Editingaccount1994/sandbox" 2 false 12406635 "Replace template per [[Wikipedia:Templates for discussion/Log/2010 June 13|TFD outcome]]; no change in content" 27783 "0to17w9rth3url8n7gvucdtobybdq5h" true "SporkBot" false false
|
||||
820078733 2018-01-12 23:28:39 56237369 "User:Editingaccount1994/sandbox" 2 false 12406635 "Replace template per [[Wikipedia:Templates for discussion/Log/2011 February 17|TFD outcome]]; no change in content" 27782 "531dizmmloyxffbkdr5vph7owh921eg" true "SporkBot" false false
|
||||
820177382 2018-01-13 13:45:33 56237369 "User:Editingaccount1994/sandbox" 2 false 13791031 "translate TestCaseD if you are from tor you need neutral point of view " 27757 "nik9p2u2fuk4yazjxt8ymbicxv5qid9" false "Frietjes" false false
|
||||
822038928 2018-01-24 01:35:22 56237369 "User:Editingaccount1994/sandbox" 2 false 2304267 "Removing [[:c:File:Li_Chevalier_Art_Studio.jpg|Li_Chevalier_Art_Studio.jpg]], it has been deleted from Commons by [[:c:User:JuTa|JuTa]] because: [[:c:COM:OTRS|No permission]] since 16 January 2018." 27667 "gwk6pampl8si1v5pv3kwgteg710sfw3" false "CommonsDelinker" false false
|
||||
819091874 2018-01-07 10:42:20 56237370 "Anita del Rey" 0 false 1368779 "r from alt name" 25 "n4ozbsgle13p9yywtfrz982ccj8woc9" false "PamD" false false
|
||||
819091883 2018-01-07 10:42:27 56237371 "User talk:119.94.96.157" 3 false 13286072 "Warning [[Special:Contributions/119.94.96.157|119.94.96.157]] - #1" 1274 "ksohnvsbeuzwpl5vb8a3v8m18hva0a7" false "ClueBot NG" false false
|
||||
819091914 2018-01-07 10:42:50 56237372 "Category:Ohmi Railway" 14 false 677153 "[[WP:AES|←]]Created page with ' [[Category:Railway companies of Japan]] [[Category:Rail transport in Shiga Prefecture]] [[Category:Seibu Group]]'" 113 "je7aw21fedbwyqsyofpisdrynsu7olr" false "Underbar dk" false false
|
||||
819091968 2018-01-07 10:43:32 56237375 "User talk:92.226.219.222" 3 false 882433 "[[WP:AES|←]]Created page with '{{3rr}}~~~~'" 199 "cpm4tkzcx4hc6irr9ukbi06ogud8dtq" false "TastyPoutine" false false
|
||||
819094036 2018-01-07 11:10:24 56237375 "User talk:92.226.219.222" 3 false 7611264 "[[User:AnomieBOT/docs/TemplateSubster|Substing templates]]: {{3rr}}. See [[User:AnomieBOT/docs/TemplateSubster]] for info." 1840 "artmfz8b2gxhb3pp8a5p4ksplxqfkpg" true "AnomieBOT" false false
|
||||
819112363 2018-01-07 14:33:36 56237375 "User talk:92.226.219.222" 3 false 702940 "Your IP address has been blocked from editing because it has been used to [[WP:EVADE|evade a previous block]]. ([[WP:TW|TW]])" 2949 "dn9wj0n8d8pdd5lqe56uw5xamupowr1" false "Only" false false "WP:EVADE"
|
||||
819092004 2018-01-07 10:44:01 56237376 "User:Dipayanacharya" 2 false 32794237 "Education" 28 "ofueugwatmmn7u73isw732neuza57gk" false "Dipayanacharya" false false
|
||||
819092390 2018-01-07 10:49:08 56237376 "User:Dipayanacharya" 2 false 32794237 "School" 38 "dsz55xv96ec2uv6w9c1z7c52ipfovbw" false "Dipayanacharya" false false
|
||||
819092066 2018-01-07 10:44:56 56237378 "BSCIC" 0 false 21516552 "[[WP:AES|←]]Redirected page to [[Bangladesh Small and Cottage Industries Corporation]]" 65 "9ma38hak0ef1ew4fpiutxpnzd8oz1wd" false "Vinegarymass911" false false
|
||||
819092102 2018-01-07 10:45:21 56237379 "Category:Women government ministers of Yemen" 14 false 754619 "[[WP:AES|←]]Created page with '{{portal|Yemen|Politics}} {{Non-diffusing subcategory|Government ministers of Yemen}} {{Underpopulated category}} Category:Women government ministers by nati...'" 285 "4dvakoat58bzyf5hmtthxukt29hip6n" false "BrownHairedGirl" false false
|
||||
819092135 2018-01-07 10:45:54 56237381 "Talk:List of Morning Glories Characters" 1 false 410898 "[[WP:AES|←]]Created page with '{{WikiProject Fictional characters|class=List|importance=low}} {{Comicsproj|class=List|importance=low}}'" 103 "2sjrxsc7os9k9pg4su2t4rk2j8nn0h7" false "PRehse" false false
|
||||
819092138 2018-01-07 10:45:56 56237382 "User talk:106.207.126.114" 3 false 13286072 "Warning [[Special:Contributions/106.207.126.114|106.207.126.114]] - #1" 1330 "3y9t5wpk6ur5jhone75rhm4wjf01fgi" false "ClueBot NG" false false
|
||||
819092495 2018-01-07 10:50:22 56237382 "User talk:106.207.126.114" 3 false 31190506 "Caution: Unconstructive editing on [[List of Baahubali characters]]. ([[WP:TW|TW]])" 2355 "8wvn6vh3isyt0dorpe89lztrburgupe" false "HindWIKI" false false
|
||||
|
|
@ -1,27 +1,27 @@
|
||||
anon articleid date_time deleted editor editorid li_cheval minor namespace revert reverteds revid sha1 text_chars three_cat three_letter three_number title
|
||||
FALSE 56237363 2018-01-07 10:40:58 FALSE "NinjaRobotPirate" 3742946 FALSE 3 FALSE 819091731 135nz8q6lfam6cojla7azb7k5alx3t3 1141 has, has "User talk:86.139.142.254"
|
||||
FALSE 56237364 2018-01-07 10:41:10 FALSE "Kavin kavitha" 32792125 FALSE 3 FALSE 819091755 0pwezjc6yopz0smc8al6ogc4fax5bwo 663 AES, for 01, 12, 2001 "User talk:Kavin kavitha"
|
||||
FALSE 56237365 2018-01-07 10:41:26 FALSE "Amicable always" 32621254 FALSE 3 FALSE 819091788 sz3t2ap7z8bpkdvdvi195f3i35949bv 399 new "User talk:Dr.vivek163"
|
||||
FALSE 56237366 2018-01-07 10:41:31 FALSE "ClueBot NG" 13286072 FALSE 3 FALSE 819091796 r6s5j8j3iykenrhuhpnkpsmmd71vubf 1260 1 "User talk:Twistorl"
|
||||
FALSE 56237368 2018-01-07 10:41:51 FALSE "Khruner" 8409334 FALSE 0 FALSE 819091825 tf5qz2yaswx61zrlm9ovxzuhl7r2dc4 2249 AES, jpg, the, the, the, the, and, you, Tor 67, 119 "Kom Firin"
|
||||
FALSE 56237368 2018-01-27 12:16:02 FALSE "Khruner" 8409334 TRUE 0 FALSE 822610647 e6oa4g0qv64icdaq26uu1zzbyr5hcbh 2230 "Kom Firin"
|
||||
FALSE 56237369 2018-01-07 10:42:05 FALSE "Editingaccount1994" 32794215 Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier FALSE 2 FALSE 819091844 0fyvyh2a8xu41gt8obr34oba0bfixj6 27840 AES, nom "User:Editingaccount1994/sandbox"
|
||||
FALSE 56237369 2018-01-07 11:09:52 FALSE "AnomieBOT" 7611264 Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier TRUE 2 FALSE 819093984 8gy52aolt5rg3eaketwj5v7eiw0apv2 27787 web, See, for "User:Editingaccount1994/sandbox"
|
||||
FALSE 56237369 2018-01-12 21:45:50 FALSE "SporkBot" 12406635 Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier TRUE 2 FALSE 820064189 he8ydemaanxlrpftqxkez8jfpge1fsj 27784 per, TFD, TFD "User:Editingaccount1994/sandbox"
|
||||
FALSE 56237369 2018-01-12 23:28:11 FALSE "SporkBot" 12406635 Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier TRUE 2 FALSE 820078679 0to17w9rth3url8n7gvucdtobybdq5h 27783 per, for, Log, TFD 2010, 13 "User:Editingaccount1994/sandbox"
|
||||
FALSE 56237369 2018-01-12 23:28:39 FALSE "SporkBot" 12406635 Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier TRUE 2 FALSE 820078733 531dizmmloyxffbkdr5vph7owh921eg 27782 per, for, Log, TFD 2011, 17 "User:Editingaccount1994/sandbox"
|
||||
FALSE 56237369 2018-01-13 13:45:33 FALSE "Frietjes" 13791031 Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier FALSE 2 FALSE 820177382 nik9p2u2fuk4yazjxt8ymbicxv5qid9 27757 you, are, tor, you "User:Editingaccount1994/sandbox"
|
||||
FALSE 56237369 2018-01-24 01:35:22 FALSE "CommonsDelinker" 2304267 Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier FALSE 2 FALSE 822038928 gwk6pampl8si1v5pv3kwgteg710sfw3 27667 jpg, jpg, has, COM 16, 2018 "User:Editingaccount1994/sandbox"
|
||||
FALSE 56237370 2018-01-07 10:42:20 FALSE "PamD" 1368779 FALSE 0 FALSE 819091874 n4ozbsgle13p9yywtfrz982ccj8woc9 25 alt "Anita del Rey"
|
||||
FALSE 56237371 2018-01-07 10:42:27 FALSE "ClueBot NG" 13286072 FALSE 3 FALSE 819091883 ksohnvsbeuzwpl5vb8a3v8m18hva0a7 1274 119, 94, 96, 157, 119, 94, 96, 157, 1 "User talk:119.94.96.157"
|
||||
FALSE 56237372 2018-01-07 10:42:50 FALSE "Underbar dk" 677153 FALSE 14 FALSE 819091914 je7aw21fedbwyqsyofpisdrynsu7olr 113 AES "Category:Ohmi Railway"
|
||||
FALSE 56237375 2018-01-07 10:43:32 FALSE "TastyPoutine" 882433 FALSE 3 FALSE 819091968 cpm4tkzcx4hc6irr9ukbi06ogud8dtq 199 AES "User talk:92.226.219.222"
|
||||
FALSE 56237375 2018-01-07 11:10:24 FALSE "AnomieBOT" 7611264 TRUE 3 FALSE 819094036 artmfz8b2gxhb3pp8a5p4ksplxqfkpg 1840 See, for "User talk:92.226.219.222"
|
||||
FALSE 56237375 2018-01-07 14:33:36 FALSE "Only" 702940 FALSE 3 FALSE 819112363 dn9wj0n8d8pdd5lqe56uw5xamupowr1 2949 has, has "User talk:92.226.219.222"
|
||||
FALSE 56237376 2018-01-07 10:44:01 FALSE "Dipayanacharya" 32794237 FALSE 2 FALSE 819092004 ofueugwatmmn7u73isw732neuza57gk 28 "User:Dipayanacharya"
|
||||
FALSE 56237376 2018-01-07 10:49:08 FALSE "Dipayanacharya" 32794237 FALSE 2 FALSE 819092390 dsz55xv96ec2uv6w9c1z7c52ipfovbw 38 "User:Dipayanacharya"
|
||||
FALSE 56237378 2018-01-07 10:44:56 FALSE "Vinegarymass911" 21516552 FALSE 0 FALSE 819092066 9ma38hak0ef1ew4fpiutxpnzd8oz1wd 65 AES, and "BSCIC"
|
||||
FALSE 56237379 2018-01-07 10:45:21 FALSE "BrownHairedGirl" 754619 FALSE 14 FALSE 819092102 4dvakoat58bzyf5hmtthxukt29hip6n 285 AES, Non "Category:Women government ministers of Yemen"
|
||||
FALSE 56237381 2018-01-07 10:45:54 FALSE "PRehse" 410898 FALSE 1 FALSE 819092135 2sjrxsc7os9k9pg4su2t4rk2j8nn0h7 103 AES, low, low "Talk:List of Morning Glories Characters"
|
||||
FALSE 56237382 2018-01-07 10:45:56 FALSE "ClueBot NG" 13286072 FALSE 3 FALSE 819092138 3y9t5wpk6ur5jhone75rhm4wjf01fgi 1330 106, 207, 126, 114, 106, 207, 126, 114, 1 "User talk:106.207.126.114"
|
||||
FALSE 56237382 2018-01-07 10:50:22 FALSE "HindWIKI" 31190506 FALSE 3 FALSE 819092495 8wvn6vh3isyt0dorpe89lztrburgupe 2355 "User talk:106.207.126.114"
|
||||
"revid" "date_time" "articleid" "title" "namespace" "deleted" "editorid" "edit_summary" "text_chars" "reverteds" "sha1" "minor" "editor" "anon" "revert" "li_cheval" "three_letter" "three_number" "three_cat"
|
||||
819091731 2018-01-07 10:40:58 56237363 "User talk:86.139.142.254" 3 false 3742946 "Your IP address has been blocked from editing because it has been used to [[WP:EVADE|evade a previous block]]. ([[WP:TW|TW]])" 1141 "135nz8q6lfam6cojla7azb7k5alx3t3" false "NinjaRobotPirate" false false "has, has"
|
||||
819091755 2018-01-07 10:41:10 56237364 "User talk:Kavin kavitha" 3 false 32792125 "[[WP:AES|←]]Created page with ''''''Kavin (Tamil. கவின்) is a masculine given name, which is Tamil for ""beauty"", ""grace"", ""fairness"" or ""comeliness""Kavin is born on 01 /12/2001 at Sa...'" 663 "0pwezjc6yopz0smc8al6ogc4fax5bwo" false "Kavin kavitha" false false "AES, for" "01, 12, 2001"
|
||||
819091788 2018-01-07 10:41:26 56237365 "User talk:Dr.vivek163" 3 false 32621254 "/* Regarding Merger discussion */ new section" 399 "sz3t2ap7z8bpkdvdvi195f3i35949bv" false "Amicable always" false false "new"
|
||||
819091796 2018-01-07 10:41:31 56237366 "User talk:Twistorl" 3 false 13286072 "Warning [[Special:Contributions/Twistorl|Twistorl]] - #1" 1260 "r6s5j8j3iykenrhuhpnkpsmmd71vubf" false "ClueBot NG" false false "1"
|
||||
819091825 2018-01-07 10:41:51 56237368 "Kom Firin" 0 false 8409334 "[[WP:AES|←]]Created page with '[[File:Stele 67.119 Brooklyn.jpg|thumb|Stele of the [[Libu#Great Chiefs of the Libu|Chief of the Libu]] Titaru, a contemporary of pharaoh [[Shoshenq V]] of the [...'TestCaseB and you're a Tor node " 2249 "tf5qz2yaswx61zrlm9ovxzuhl7r2dc4" false "Khruner" false false "AES, jpg, the, the, the, the, and, you, Tor" "67, 119"
|
||||
822610647 2018-01-27 12:16:02 56237368 "Kom Firin" 0 false 8409334 "/* History */ typo" 2230 "e6oa4g0qv64icdaq26uu1zzbyr5hcbh" true "Khruner" false false
|
||||
819091844 2018-01-07 10:42:05 56237369 "User:Editingaccount1994/sandbox" 2 false 32794215 "[[WP:AES|←]]Created page with '{{User sandbox}} <!-- EDIT BELOW THIS LINE --> {{voir homonymes|Chevalier}} {{Infobox Artiste | nom = Li Chevalier | autres noms = | im...'" 27840 "0fyvyh2a8xu41gt8obr34oba0bfixj6" false "Editingaccount1994" false false "Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier" "AES, nom"
|
||||
819093984 2018-01-07 11:09:52 56237369 "User:Editingaccount1994/sandbox" 2 false 7611264 "[[User:AnomieBOT/docs/TemplateSubster|Substing templates]]: {{Lien web}}. See [[User:AnomieBOT/docs/TemplateSubster]] for info." 27787 "8gy52aolt5rg3eaketwj5v7eiw0apv2" true "AnomieBOT" false false "Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier" "web, See, for"
|
||||
820064189 2018-01-12 21:45:50 56237369 "User:Editingaccount1994/sandbox" 2 false 12406635 "Orphan per [[WP:TFD|TFD outcome]]" 27784 "he8ydemaanxlrpftqxkez8jfpge1fsj" true "SporkBot" false false "Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier" "per, TFD, TFD"
|
||||
820078679 2018-01-12 23:28:11 56237369 "User:Editingaccount1994/sandbox" 2 false 12406635 "Replace template per [[Wikipedia:Templates for discussion/Log/2010 June 13|TFD outcome]]; no change in content" 27783 "0to17w9rth3url8n7gvucdtobybdq5h" true "SporkBot" false false "Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier" "per, for, Log, TFD" "2010, 13"
|
||||
820078733 2018-01-12 23:28:39 56237369 "User:Editingaccount1994/sandbox" 2 false 12406635 "Replace template per [[Wikipedia:Templates for discussion/Log/2011 February 17|TFD outcome]]; no change in content" 27782 "531dizmmloyxffbkdr5vph7owh921eg" true "SporkBot" false false "Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier" "per, for, Log, TFD" "2011, 17"
|
||||
820177382 2018-01-13 13:45:33 56237369 "User:Editingaccount1994/sandbox" 2 false 13791031 "translate TestCaseD if you are from tor you need neutral point of view " 27757 "nik9p2u2fuk4yazjxt8ymbicxv5qid9" false "Frietjes" false false "Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier" "you, are, tor, you"
|
||||
822038928 2018-01-24 01:35:22 56237369 "User:Editingaccount1994/sandbox" 2 false 2304267 "Removing [[:c:File:Li_Chevalier_Art_Studio.jpg|Li_Chevalier_Art_Studio.jpg]], it has been deleted from Commons by [[:c:User:JuTa|JuTa]] because: [[:c:COM:OTRS|No permission]] since 16 January 2018." 27667 "gwk6pampl8si1v5pv3kwgteg710sfw3" false "CommonsDelinker" false false "Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier" "jpg, jpg, has, COM" "16, 2018"
|
||||
819091874 2018-01-07 10:42:20 56237370 "Anita del Rey" 0 false 1368779 "r from alt name" 25 "n4ozbsgle13p9yywtfrz982ccj8woc9" false "PamD" false false "alt"
|
||||
819091883 2018-01-07 10:42:27 56237371 "User talk:119.94.96.157" 3 false 13286072 "Warning [[Special:Contributions/119.94.96.157|119.94.96.157]] - #1" 1274 "ksohnvsbeuzwpl5vb8a3v8m18hva0a7" false "ClueBot NG" false false "119, 94, 96, 157, 119, 94, 96, 157, 1"
|
||||
819091914 2018-01-07 10:42:50 56237372 "Category:Ohmi Railway" 14 false 677153 "[[WP:AES|←]]Created page with ' [[Category:Railway companies of Japan]] [[Category:Rail transport in Shiga Prefecture]] [[Category:Seibu Group]]'" 113 "je7aw21fedbwyqsyofpisdrynsu7olr" false "Underbar dk" false false "AES"
|
||||
819091968 2018-01-07 10:43:32 56237375 "User talk:92.226.219.222" 3 false 882433 "[[WP:AES|←]]Created page with '{{3rr}}~~~~'" 199 "cpm4tkzcx4hc6irr9ukbi06ogud8dtq" false "TastyPoutine" false false "AES"
|
||||
819094036 2018-01-07 11:10:24 56237375 "User talk:92.226.219.222" 3 false 7611264 "[[User:AnomieBOT/docs/TemplateSubster|Substing templates]]: {{3rr}}. See [[User:AnomieBOT/docs/TemplateSubster]] for info." 1840 "artmfz8b2gxhb3pp8a5p4ksplxqfkpg" true "AnomieBOT" false false "See, for"
|
||||
819112363 2018-01-07 14:33:36 56237375 "User talk:92.226.219.222" 3 false 702940 "Your IP address has been blocked from editing because it has been used to [[WP:EVADE|evade a previous block]]. ([[WP:TW|TW]])" 2949 "dn9wj0n8d8pdd5lqe56uw5xamupowr1" false "Only" false false "has, has"
|
||||
819092004 2018-01-07 10:44:01 56237376 "User:Dipayanacharya" 2 false 32794237 "Education" 28 "ofueugwatmmn7u73isw732neuza57gk" false "Dipayanacharya" false false
|
||||
819092390 2018-01-07 10:49:08 56237376 "User:Dipayanacharya" 2 false 32794237 "School" 38 "dsz55xv96ec2uv6w9c1z7c52ipfovbw" false "Dipayanacharya" false false
|
||||
819092066 2018-01-07 10:44:56 56237378 "BSCIC" 0 false 21516552 "[[WP:AES|←]]Redirected page to [[Bangladesh Small and Cottage Industries Corporation]]" 65 "9ma38hak0ef1ew4fpiutxpnzd8oz1wd" false "Vinegarymass911" false false "AES, and"
|
||||
819092102 2018-01-07 10:45:21 56237379 "Category:Women government ministers of Yemen" 14 false 754619 "[[WP:AES|←]]Created page with '{{portal|Yemen|Politics}} {{Non-diffusing subcategory|Government ministers of Yemen}} {{Underpopulated category}} Category:Women government ministers by nati...'" 285 "4dvakoat58bzyf5hmtthxukt29hip6n" false "BrownHairedGirl" false false "AES, Non"
|
||||
819092135 2018-01-07 10:45:54 56237381 "Talk:List of Morning Glories Characters" 1 false 410898 "[[WP:AES|←]]Created page with '{{WikiProject Fictional characters|class=List|importance=low}} {{Comicsproj|class=List|importance=low}}'" 103 "2sjrxsc7os9k9pg4su2t4rk2j8nn0h7" false "PRehse" false false "AES, low, low"
|
||||
819092138 2018-01-07 10:45:56 56237382 "User talk:106.207.126.114" 3 false 13286072 "Warning [[Special:Contributions/106.207.126.114|106.207.126.114]] - #1" 1330 "3y9t5wpk6ur5jhone75rhm4wjf01fgi" false "ClueBot NG" false false "106, 207, 126, 114, 106, 207, 126, 114, 1"
|
||||
819092495 2018-01-07 10:50:22 56237382 "User talk:106.207.126.114" 3 false 31190506 "Caution: Unconstructive editing on [[List of Baahubali characters]]. ([[WP:TW|TW]])" 2355 "8wvn6vh3isyt0dorpe89lztrburgupe" false "HindWIKI" false false
|
||||
|
|
@ -1,27 +1,27 @@
|
||||
anon articleid date_time deleted editor editorid minor namespace npov_neutral npov_npov revert reverteds revid sha1 testcase_a testcase_b testcase_c testcase_d text_chars title
|
||||
FALSE 56237363 2018-01-07 10:40:58 FALSE "NinjaRobotPirate" 3742946 FALSE 3 FALSE 819091731 135nz8q6lfam6cojla7azb7k5alx3t3 1141 "User talk:86.139.142.254"
|
||||
FALSE 56237364 2018-01-07 10:41:10 FALSE "Kavin kavitha" 32792125 FALSE 3 FALSE 819091755 0pwezjc6yopz0smc8al6ogc4fax5bwo 663 "User talk:Kavin kavitha"
|
||||
FALSE 56237365 2018-01-07 10:41:26 FALSE "Amicable always" 32621254 FALSE 3 NPOV, NPOV FALSE 819091788 sz3t2ap7z8bpkdvdvi195f3i35949bv 399 "User talk:Dr.vivek163"
|
||||
FALSE 56237366 2018-01-07 10:41:31 FALSE "ClueBot NG" 13286072 FALSE 3 FALSE 819091796 r6s5j8j3iykenrhuhpnkpsmmd71vubf 1260 "User talk:Twistorl"
|
||||
FALSE 56237368 2018-01-07 10:41:51 FALSE "Khruner" 8409334 FALSE 0 NPOV FALSE 819091825 tf5qz2yaswx61zrlm9ovxzuhl7r2dc4 TestCaseB 2249 "Kom Firin"
|
||||
FALSE 56237368 2018-01-27 12:16:02 FALSE "Khruner" 8409334 TRUE 0 FALSE 822610647 e6oa4g0qv64icdaq26uu1zzbyr5hcbh 2230 "Kom Firin"
|
||||
FALSE 56237369 2018-01-07 10:42:05 FALSE "Editingaccount1994" 32794215 FALSE 2 FALSE 819091844 0fyvyh2a8xu41gt8obr34oba0bfixj6 27840 "User:Editingaccount1994/sandbox"
|
||||
FALSE 56237369 2018-01-07 11:09:52 FALSE "AnomieBOT" 7611264 TRUE 2 FALSE 819093984 8gy52aolt5rg3eaketwj5v7eiw0apv2 27787 "User:Editingaccount1994/sandbox"
|
||||
FALSE 56237369 2018-01-12 21:45:50 FALSE "SporkBot" 12406635 TRUE 2 FALSE 820064189 he8ydemaanxlrpftqxkez8jfpge1fsj 27784 "User:Editingaccount1994/sandbox"
|
||||
FALSE 56237369 2018-01-12 23:28:11 FALSE "SporkBot" 12406635 TRUE 2 FALSE 820078679 0to17w9rth3url8n7gvucdtobybdq5h 27783 "User:Editingaccount1994/sandbox"
|
||||
FALSE 56237369 2018-01-12 23:28:39 FALSE "SporkBot" 12406635 TRUE 2 FALSE 820078733 531dizmmloyxffbkdr5vph7owh921eg 27782 "User:Editingaccount1994/sandbox"
|
||||
FALSE 56237369 2018-01-13 13:45:33 FALSE "Frietjes" 13791031 FALSE 2 FALSE 820177382 nik9p2u2fuk4yazjxt8ymbicxv5qid9 TestCaseD 27757 "User:Editingaccount1994/sandbox"
|
||||
FALSE 56237369 2018-01-24 01:35:22 FALSE "CommonsDelinker" 2304267 FALSE 2 FALSE 822038928 gwk6pampl8si1v5pv3kwgteg710sfw3 27667 "User:Editingaccount1994/sandbox"
|
||||
FALSE 56237370 2018-01-07 10:42:20 FALSE "PamD" 1368779 FALSE 0 FALSE 819091874 n4ozbsgle13p9yywtfrz982ccj8woc9 25 "Anita del Rey"
|
||||
FALSE 56237371 2018-01-07 10:42:27 FALSE "ClueBot NG" 13286072 FALSE 3 FALSE 819091883 ksohnvsbeuzwpl5vb8a3v8m18hva0a7 1274 "User talk:119.94.96.157"
|
||||
FALSE 56237372 2018-01-07 10:42:50 FALSE "Underbar dk" 677153 FALSE 14 FALSE 819091914 je7aw21fedbwyqsyofpisdrynsu7olr 113 "Category:Ohmi Railway"
|
||||
FALSE 56237375 2018-01-07 10:43:32 FALSE "TastyPoutine" 882433 FALSE 3 FALSE 819091968 cpm4tkzcx4hc6irr9ukbi06ogud8dtq 199 "User talk:92.226.219.222"
|
||||
FALSE 56237375 2018-01-07 11:10:24 FALSE "AnomieBOT" 7611264 TRUE 3 FALSE 819094036 artmfz8b2gxhb3pp8a5p4ksplxqfkpg 1840 "User talk:92.226.219.222"
|
||||
FALSE 56237375 2018-01-07 14:33:36 FALSE "Only" 702940 FALSE 3 FALSE 819112363 dn9wj0n8d8pdd5lqe56uw5xamupowr1 2949 "User talk:92.226.219.222"
|
||||
FALSE 56237376 2018-01-07 10:44:01 FALSE "Dipayanacharya" 32794237 FALSE 2 FALSE 819092004 ofueugwatmmn7u73isw732neuza57gk 28 "User:Dipayanacharya"
|
||||
FALSE 56237376 2018-01-07 10:49:08 FALSE "Dipayanacharya" 32794237 FALSE 2 FALSE 819092390 dsz55xv96ec2uv6w9c1z7c52ipfovbw 38 "User:Dipayanacharya"
|
||||
FALSE 56237378 2018-01-07 10:44:56 FALSE "Vinegarymass911" 21516552 FALSE 0 FALSE 819092066 9ma38hak0ef1ew4fpiutxpnzd8oz1wd 65 "BSCIC"
|
||||
FALSE 56237379 2018-01-07 10:45:21 FALSE "BrownHairedGirl" 754619 FALSE 14 FALSE 819092102 4dvakoat58bzyf5hmtthxukt29hip6n 285 "Category:Women government ministers of Yemen"
|
||||
FALSE 56237381 2018-01-07 10:45:54 FALSE "PRehse" 410898 FALSE 1 FALSE 819092135 2sjrxsc7os9k9pg4su2t4rk2j8nn0h7 103 "Talk:List of Morning Glories Characters"
|
||||
FALSE 56237382 2018-01-07 10:45:56 FALSE "ClueBot NG" 13286072 FALSE 3 FALSE 819092138 3y9t5wpk6ur5jhone75rhm4wjf01fgi 1330 "User talk:106.207.126.114"
|
||||
FALSE 56237382 2018-01-07 10:50:22 FALSE "HindWIKI" 31190506 FALSE 3 FALSE 819092495 8wvn6vh3isyt0dorpe89lztrburgupe 2355 "User talk:106.207.126.114"
|
||||
"revid" "date_time" "articleid" "title" "namespace" "deleted" "editorid" "edit_summary" "text_chars" "reverteds" "sha1" "minor" "editor" "anon" "revert" "npov_npov" "npov_neutral" "testcase_a" "testcase_b" "testcase_c" "testcase_d"
|
||||
819091731 2018-01-07 10:40:58 56237363 "User talk:86.139.142.254" 3 false 3742946 "Your IP address has been blocked from editing because it has been used to [[WP:EVADE|evade a previous block]]. ([[WP:TW|TW]])" 1141 "135nz8q6lfam6cojla7azb7k5alx3t3" false "NinjaRobotPirate" false false
|
||||
819091755 2018-01-07 10:41:10 56237364 "User talk:Kavin kavitha" 3 false 32792125 "[[WP:AES|←]]Created page with ''''''Kavin (Tamil. கவின்) is a masculine given name, which is Tamil for ""beauty"", ""grace"", ""fairness"" or ""comeliness""Kavin is born on 01 /12/2001 at Sa...'" 663 "0pwezjc6yopz0smc8al6ogc4fax5bwo" false "Kavin kavitha" false false
|
||||
819091788 2018-01-07 10:41:26 56237365 "User talk:Dr.vivek163" 3 false 32621254 "/* Regarding Merger discussion */ new section" 399 "sz3t2ap7z8bpkdvdvi195f3i35949bv" false "Amicable always" false false "NPOV, NPOV"
|
||||
819091796 2018-01-07 10:41:31 56237366 "User talk:Twistorl" 3 false 13286072 "Warning [[Special:Contributions/Twistorl|Twistorl]] - #1" 1260 "r6s5j8j3iykenrhuhpnkpsmmd71vubf" false "ClueBot NG" false false
|
||||
819091825 2018-01-07 10:41:51 56237368 "Kom Firin" 0 false 8409334 "[[WP:AES|←]]Created page with '[[File:Stele 67.119 Brooklyn.jpg|thumb|Stele of the [[Libu#Great Chiefs of the Libu|Chief of the Libu]] Titaru, a contemporary of pharaoh [[Shoshenq V]] of the [...'TestCaseB and you're a Tor node " 2249 "tf5qz2yaswx61zrlm9ovxzuhl7r2dc4" false "Khruner" false false "NPOV" "TestCaseB"
|
||||
822610647 2018-01-27 12:16:02 56237368 "Kom Firin" 0 false 8409334 "/* History */ typo" 2230 "e6oa4g0qv64icdaq26uu1zzbyr5hcbh" true "Khruner" false false
|
||||
819091844 2018-01-07 10:42:05 56237369 "User:Editingaccount1994/sandbox" 2 false 32794215 "[[WP:AES|←]]Created page with '{{User sandbox}} <!-- EDIT BELOW THIS LINE --> {{voir homonymes|Chevalier}} {{Infobox Artiste | nom = Li Chevalier | autres noms = | im...'" 27840 "0fyvyh2a8xu41gt8obr34oba0bfixj6" false "Editingaccount1994" false false
|
||||
819093984 2018-01-07 11:09:52 56237369 "User:Editingaccount1994/sandbox" 2 false 7611264 "[[User:AnomieBOT/docs/TemplateSubster|Substing templates]]: {{Lien web}}. See [[User:AnomieBOT/docs/TemplateSubster]] for info." 27787 "8gy52aolt5rg3eaketwj5v7eiw0apv2" true "AnomieBOT" false false
|
||||
820064189 2018-01-12 21:45:50 56237369 "User:Editingaccount1994/sandbox" 2 false 12406635 "Orphan per [[WP:TFD|TFD outcome]]" 27784 "he8ydemaanxlrpftqxkez8jfpge1fsj" true "SporkBot" false false
|
||||
820078679 2018-01-12 23:28:11 56237369 "User:Editingaccount1994/sandbox" 2 false 12406635 "Replace template per [[Wikipedia:Templates for discussion/Log/2010 June 13|TFD outcome]]; no change in content" 27783 "0to17w9rth3url8n7gvucdtobybdq5h" true "SporkBot" false false
|
||||
820078733 2018-01-12 23:28:39 56237369 "User:Editingaccount1994/sandbox" 2 false 12406635 "Replace template per [[Wikipedia:Templates for discussion/Log/2011 February 17|TFD outcome]]; no change in content" 27782 "531dizmmloyxffbkdr5vph7owh921eg" true "SporkBot" false false
|
||||
820177382 2018-01-13 13:45:33 56237369 "User:Editingaccount1994/sandbox" 2 false 13791031 "translate TestCaseD if you are from tor you need neutral point of view " 27757 "nik9p2u2fuk4yazjxt8ymbicxv5qid9" false "Frietjes" false false "TestCaseD"
|
||||
822038928 2018-01-24 01:35:22 56237369 "User:Editingaccount1994/sandbox" 2 false 2304267 "Removing [[:c:File:Li_Chevalier_Art_Studio.jpg|Li_Chevalier_Art_Studio.jpg]], it has been deleted from Commons by [[:c:User:JuTa|JuTa]] because: [[:c:COM:OTRS|No permission]] since 16 January 2018." 27667 "gwk6pampl8si1v5pv3kwgteg710sfw3" false "CommonsDelinker" false false
|
||||
819091874 2018-01-07 10:42:20 56237370 "Anita del Rey" 0 false 1368779 "r from alt name" 25 "n4ozbsgle13p9yywtfrz982ccj8woc9" false "PamD" false false
|
||||
819091883 2018-01-07 10:42:27 56237371 "User talk:119.94.96.157" 3 false 13286072 "Warning [[Special:Contributions/119.94.96.157|119.94.96.157]] - #1" 1274 "ksohnvsbeuzwpl5vb8a3v8m18hva0a7" false "ClueBot NG" false false
|
||||
819091914 2018-01-07 10:42:50 56237372 "Category:Ohmi Railway" 14 false 677153 "[[WP:AES|←]]Created page with ' [[Category:Railway companies of Japan]] [[Category:Rail transport in Shiga Prefecture]] [[Category:Seibu Group]]'" 113 "je7aw21fedbwyqsyofpisdrynsu7olr" false "Underbar dk" false false
|
||||
819091968 2018-01-07 10:43:32 56237375 "User talk:92.226.219.222" 3 false 882433 "[[WP:AES|←]]Created page with '{{3rr}}~~~~'" 199 "cpm4tkzcx4hc6irr9ukbi06ogud8dtq" false "TastyPoutine" false false
|
||||
819094036 2018-01-07 11:10:24 56237375 "User talk:92.226.219.222" 3 false 7611264 "[[User:AnomieBOT/docs/TemplateSubster|Substing templates]]: {{3rr}}. See [[User:AnomieBOT/docs/TemplateSubster]] for info." 1840 "artmfz8b2gxhb3pp8a5p4ksplxqfkpg" true "AnomieBOT" false false
|
||||
819112363 2018-01-07 14:33:36 56237375 "User talk:92.226.219.222" 3 false 702940 "Your IP address has been blocked from editing because it has been used to [[WP:EVADE|evade a previous block]]. ([[WP:TW|TW]])" 2949 "dn9wj0n8d8pdd5lqe56uw5xamupowr1" false "Only" false false
|
||||
819092004 2018-01-07 10:44:01 56237376 "User:Dipayanacharya" 2 false 32794237 "Education" 28 "ofueugwatmmn7u73isw732neuza57gk" false "Dipayanacharya" false false
|
||||
819092390 2018-01-07 10:49:08 56237376 "User:Dipayanacharya" 2 false 32794237 "School" 38 "dsz55xv96ec2uv6w9c1z7c52ipfovbw" false "Dipayanacharya" false false
|
||||
819092066 2018-01-07 10:44:56 56237378 "BSCIC" 0 false 21516552 "[[WP:AES|←]]Redirected page to [[Bangladesh Small and Cottage Industries Corporation]]" 65 "9ma38hak0ef1ew4fpiutxpnzd8oz1wd" false "Vinegarymass911" false false
|
||||
819092102 2018-01-07 10:45:21 56237379 "Category:Women government ministers of Yemen" 14 false 754619 "[[WP:AES|←]]Created page with '{{portal|Yemen|Politics}} {{Non-diffusing subcategory|Government ministers of Yemen}} {{Underpopulated category}} Category:Women government ministers by nati...'" 285 "4dvakoat58bzyf5hmtthxukt29hip6n" false "BrownHairedGirl" false false
|
||||
819092135 2018-01-07 10:45:54 56237381 "Talk:List of Morning Glories Characters" 1 false 410898 "[[WP:AES|←]]Created page with '{{WikiProject Fictional characters|class=List|importance=low}} {{Comicsproj|class=List|importance=low}}'" 103 "2sjrxsc7os9k9pg4su2t4rk2j8nn0h7" false "PRehse" false false
|
||||
819092138 2018-01-07 10:45:56 56237382 "User talk:106.207.126.114" 3 false 13286072 "Warning [[Special:Contributions/106.207.126.114|106.207.126.114]] - #1" 1330 "3y9t5wpk6ur5jhone75rhm4wjf01fgi" false "ClueBot NG" false false
|
||||
819092495 2018-01-07 10:50:22 56237382 "User talk:106.207.126.114" 3 false 31190506 "Caution: Unconstructive editing on [[List of Baahubali characters]]. ([[WP:TW|TW]])" 2355 "8wvn6vh3isyt0dorpe89lztrburgupe" false "HindWIKI" false false
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
27808
test/baseline_output/noargs_ikwiki-20180301-pages-meta-history.tsv
Normal file
27808
test/baseline_output/noargs_ikwiki-20180301-pages-meta-history.tsv
Normal file
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
671
wikiq
671
wikiq
@ -8,28 +8,31 @@ import argparse
|
||||
import sys
|
||||
import os.path
|
||||
import re
|
||||
from datetime import datetime, timezone
|
||||
from io import TextIOWrapper
|
||||
from itertools import groupby
|
||||
|
||||
from subprocess import Popen, PIPE
|
||||
from collections import deque
|
||||
from hashlib import sha1
|
||||
from typing import Any, IO, TextIO, Generator, Union
|
||||
|
||||
import mwxml
|
||||
from mwxml import Dump
|
||||
|
||||
from deltas.tokenizers import wikitext_split
|
||||
import mwpersistence
|
||||
import mwreverts
|
||||
from urllib.parse import quote
|
||||
|
||||
import tables
|
||||
from tables import RevisionTable
|
||||
|
||||
TO_ENCODE = ('title', 'editor')
|
||||
PERSISTENCE_RADIUS = 7
|
||||
from deltas import SequenceMatcher
|
||||
from deltas import SegmentMatcher
|
||||
from deltas import SequenceMatcher, SegmentMatcher
|
||||
|
||||
import dataclasses as dc
|
||||
from dataclasses import dataclass
|
||||
import pyarrow as pa
|
||||
import pyarrow.parquet as pq
|
||||
import pyarrow.csv as pacsv
|
||||
|
||||
|
||||
class PersistMethod:
|
||||
@ -44,6 +47,18 @@ def calculate_persistence(tokens_added):
|
||||
len(tokens_added))
|
||||
|
||||
|
||||
def fix_hex_digests(revs: list[mwxml.Revision]) -> list[mwxml.Revision]:
|
||||
i = 0
|
||||
for rev in revs:
|
||||
if rev.text is None:
|
||||
rev.text = ""
|
||||
if not rev.sha1 and not rev.deleted.text:
|
||||
rev.sha1 = sha1(bytes(rev.text, "utf8")).hexdigest()
|
||||
revs[i] = rev
|
||||
i+=1
|
||||
return revs
|
||||
|
||||
|
||||
class WikiqIterator:
|
||||
def __init__(self, fh, collapse_user=False):
|
||||
self.fh = fh
|
||||
@ -51,7 +66,7 @@ class WikiqIterator:
|
||||
self.mwiterator = Dump.from_file(self.fh)
|
||||
self.namespace_map = {ns.id: ns.name for ns in
|
||||
self.mwiterator.site_info.namespaces}
|
||||
self.__pages = self.load_pages()
|
||||
self.__pages: Generator[WikiqPage] = self.load_pages()
|
||||
|
||||
def load_pages(self):
|
||||
for page in self.mwiterator:
|
||||
@ -67,26 +82,26 @@ class WikiqIterator:
|
||||
|
||||
|
||||
class WikiqPage:
|
||||
__slots__ = ('id', 'title', 'namespace', 'redirect',
|
||||
__slots__ = ('id', 'redirect',
|
||||
'restrictions', 'mwpage', '__revisions',
|
||||
'collapse_user')
|
||||
|
||||
def __init__(self, page, namespace_map, collapse_user=False):
|
||||
self.id = page.id
|
||||
self.namespace = page.namespace
|
||||
# following mwxml, we assume namespace 0 in cases where
|
||||
# page.namespace is inconsistent with namespace_map
|
||||
if page.namespace not in namespace_map:
|
||||
self.title = page.title
|
||||
page.namespace = 0
|
||||
if page.namespace != 0:
|
||||
self.title = ':'.join([namespace_map[page.namespace], page.title])
|
||||
else:
|
||||
self.title = page.title
|
||||
page.title = ':'.join([namespace_map[page.namespace], page.title])
|
||||
self.restrictions = page.restrictions
|
||||
self.collapse_user = collapse_user
|
||||
self.mwpage = page
|
||||
self.__revisions = self.rev_list()
|
||||
self.__revisions: Generator[list[mwxml.Revision]] = self.rev_list()
|
||||
|
||||
@staticmethod
|
||||
def user_text(rev) -> Union[str, None]:
|
||||
return None if rev.deleted.user else rev.user.text
|
||||
|
||||
def rev_list(self):
|
||||
# Outline for how we want to handle collapse_user=True
|
||||
@ -97,40 +112,16 @@ class WikiqPage:
|
||||
# 3 A B True
|
||||
# 4 A A False
|
||||
# Post-loop A Always
|
||||
collapsed_revs = 0
|
||||
for i, rev in enumerate(self.mwpage):
|
||||
# never yield the first time
|
||||
if i == 0:
|
||||
if self.collapse_user:
|
||||
collapsed_revs = 1
|
||||
rev.collapsed_revs = collapsed_revs
|
||||
|
||||
else:
|
||||
if self.collapse_user:
|
||||
# yield if this is the last edit in a seq by a user and reset
|
||||
# also yield if we do know who the user is
|
||||
if not self.collapse_user:
|
||||
for rev in self.mwpage:
|
||||
yield [rev]
|
||||
return
|
||||
|
||||
if rev.deleted.user or prev_rev.deleted.user:
|
||||
yield prev_rev
|
||||
collapsed_revs = 1
|
||||
rev.collapsed_revs = collapsed_revs
|
||||
|
||||
elif not rev.user.text == prev_rev.user.text:
|
||||
yield prev_rev
|
||||
collapsed_revs = 1
|
||||
rev.collapsed_revs = collapsed_revs
|
||||
# otherwise, add one to the counter
|
||||
else:
|
||||
collapsed_revs += 1
|
||||
rev.collapsed_revs = collapsed_revs
|
||||
# if collapse_user is false, we always yield
|
||||
else:
|
||||
yield prev_rev
|
||||
|
||||
prev_rev = rev
|
||||
|
||||
# also yield the final time
|
||||
yield prev_rev
|
||||
for _, revs in groupby(self.mwpage, self.user_text):
|
||||
# All revisions are either from the same user, or this is a single
|
||||
# revision where the user is missing.
|
||||
yield list(revs)
|
||||
|
||||
def __iter__(self):
|
||||
return self.__revisions
|
||||
@ -156,18 +147,17 @@ class RegexPair(object):
|
||||
|
||||
def get_pyarrow_fields(self):
|
||||
if self.has_groups:
|
||||
fields = [pa.field(self._make_key(cap_group), pa.list_(pa.string()))
|
||||
fields = [pa.field(self._make_key(cap_group), pa.string())
|
||||
for cap_group in self.capture_groups]
|
||||
else:
|
||||
fields = [pa.field(self.label, pa.list_(pa.string()))]
|
||||
fields = [pa.field(self.label, pa.string())]
|
||||
|
||||
return fields
|
||||
|
||||
def _make_key(self, cap_group):
|
||||
return "{}_{}".format(self.label, cap_group)
|
||||
|
||||
def matchmake(self, content, rev_data):
|
||||
|
||||
def matchmake(self, content: str) -> dict:
|
||||
temp_dict = {}
|
||||
# if there are named capture groups in the regex
|
||||
if self.has_groups:
|
||||
@ -208,182 +198,32 @@ class RegexPair(object):
|
||||
else:
|
||||
temp_dict[self.label] = None
|
||||
|
||||
# update rev_data with our new columns
|
||||
for k, v in temp_dict.items():
|
||||
setattr(rev_data, k, v)
|
||||
|
||||
return rev_data
|
||||
|
||||
|
||||
"""
|
||||
|
||||
We used to use a dictionary to collect fields for the output.
|
||||
Now we use dataclasses. Compared to a dictionary, this should help:
|
||||
- prevent some bugs
|
||||
- make it easier to output parquet data.
|
||||
- use class attribute '.' syntax instead of dictionary syntax.
|
||||
- improve support for tooling (autocomplete, type hints)
|
||||
- use type information to define formatting rules
|
||||
|
||||
Depending on the parameters passed into Wikiq, the output schema can be different.
|
||||
Therefore, we need to end up constructing a dataclass with the correct output schema.
|
||||
It also needs to have the correct pyarrow schema so we can write parquet files.
|
||||
|
||||
The RevDataBase type has all the fields that will be output no matter how wikiq is invoked.
|
||||
"""
|
||||
|
||||
|
||||
@dataclass()
|
||||
class RevDataBase:
|
||||
revid: int
|
||||
date_time: datetime
|
||||
articleid: int
|
||||
title: str
|
||||
namespace: int
|
||||
deleted: bool
|
||||
edit_summary: str
|
||||
editorid: int = None
|
||||
text_chars: int = None
|
||||
revert: bool = None
|
||||
reverteds: list[int] = None
|
||||
sha1: str = None
|
||||
minor: bool = None
|
||||
editor: str = None
|
||||
anon: bool = None
|
||||
|
||||
# toggles url encoding. this isn't a dataclass field since it doesn't have a type annotation
|
||||
urlencode = False
|
||||
|
||||
# defines pyarrow schema.
|
||||
# each field in the data class needs an entry in this array.
|
||||
# the names should match and be in the same order.
|
||||
# this isn't a dataclass field since it doesn't have a type annotation
|
||||
pa_schema_fields = [
|
||||
pa.field("revid", pa.int64()),
|
||||
pa.field("date_time", pa.timestamp('ms')),
|
||||
pa.field("articleid",pa.int64()),
|
||||
pa.field("title",pa.string()),
|
||||
pa.field("namespace",pa.int32()),
|
||||
pa.field("deleted",pa.bool_()),
|
||||
pa.field("edit_summary",pa.string()),
|
||||
pa.field("editorid",pa.int64(), nullable=True),
|
||||
pa.field("text_chars",pa.int32()),
|
||||
pa.field("revert",pa.bool_(), nullable=True),
|
||||
pa.field("reverteds",pa.list_(pa.int64()), nullable=True),
|
||||
pa.field("sha1",pa.string()),
|
||||
pa.field("minor",pa.bool_()),
|
||||
pa.field("editor",pa.string()),
|
||||
pa.field("anon",pa.bool_())
|
||||
]
|
||||
|
||||
# pyarrow is a columnar format, so most of the work happens in the flush_parquet_buffer function
|
||||
def to_pyarrow(self):
|
||||
return dc.astuple(self)
|
||||
|
||||
# logic to convert each field into the wikiq tsv format goes here.
|
||||
def to_tsv_row(self):
|
||||
|
||||
row = []
|
||||
for f in dc.fields(self):
|
||||
val = getattr(self, f.name)
|
||||
if getattr(self, f.name) is None:
|
||||
row.append("")
|
||||
elif f.type == bool:
|
||||
row.append("TRUE" if val else "FALSE")
|
||||
|
||||
elif f.type == datetime:
|
||||
row.append(val.strftime('%Y-%m-%d %H:%M:%S'))
|
||||
|
||||
elif f.name in {'editor', 'title'}:
|
||||
s = '"' + val + '"'
|
||||
if self.urlencode and f.name in TO_ENCODE:
|
||||
row.append(quote(str(s)))
|
||||
else:
|
||||
row.append(s)
|
||||
|
||||
elif f.type == list[int]:
|
||||
row.append('"' + ",".join([str(x) for x in val]) + '"')
|
||||
|
||||
elif f.type == str:
|
||||
if self.urlencode and f.name in TO_ENCODE:
|
||||
row.append(quote(str(val)))
|
||||
else:
|
||||
row.append(val)
|
||||
else:
|
||||
row.append(val)
|
||||
|
||||
return '\t'.join(map(str, row))
|
||||
|
||||
def header_row(self):
|
||||
return '\t'.join(map(lambda f: f.name, dc.fields(self)))
|
||||
|
||||
|
||||
"""
|
||||
|
||||
If collapse=True we'll use a RevDataCollapse dataclass.
|
||||
This class inherits from RevDataBase. This means that it has all the same fields and functions.
|
||||
|
||||
It just adds a new field and updates the pyarrow schema.
|
||||
|
||||
"""
|
||||
|
||||
|
||||
@dataclass()
|
||||
class RevDataCollapse(RevDataBase):
|
||||
collapsed_revs: int = None
|
||||
|
||||
pa_collapsed_revs_schema = pa.field('collapsed_revs', pa.int64())
|
||||
pa_schema_fields = RevDataBase.pa_schema_fields + [pa_collapsed_revs_schema]
|
||||
|
||||
|
||||
"""
|
||||
|
||||
If persistence data is to be computed we'll need the fields added by RevDataPersistence.
|
||||
|
||||
"""
|
||||
|
||||
|
||||
@dataclass()
|
||||
class RevDataPersistence(RevDataBase):
|
||||
token_revs: int = None
|
||||
tokens_added: int = None
|
||||
tokens_removed: int = None
|
||||
tokens_window: int = None
|
||||
|
||||
pa_persistence_schema_fields = [
|
||||
pa.field("token_revs", pa.int64()),
|
||||
pa.field("tokens_added", pa.int64()),
|
||||
pa.field("tokens_removed", pa.int64()),
|
||||
pa.field("tokens_window", pa.int64())]
|
||||
|
||||
pa_schema_fields = RevDataBase.pa_schema_fields + pa_persistence_schema_fields
|
||||
|
||||
|
||||
"""
|
||||
class RevDataCollapsePersistence uses multiple inheritance to make a class that has both persistence and collapse fields.
|
||||
|
||||
"""
|
||||
|
||||
|
||||
@dataclass()
|
||||
class RevDataCollapsePersistence(RevDataCollapse, RevDataPersistence):
|
||||
pa_schema_fields = RevDataCollapse.pa_schema_fields + RevDataPersistence.pa_persistence_schema_fields
|
||||
return temp_dict
|
||||
|
||||
|
||||
class WikiqParser:
|
||||
def __init__(self, input_file, output_file, regex_match_revision, regex_match_comment, regex_revision_label,
|
||||
regex_comment_label, collapse_user=False, persist=None, urlencode=False, namespaces=None,
|
||||
revert_radius=15, output_parquet=True, parquet_buffer_size=2000):
|
||||
def __init__(self,
|
||||
input_file: Union[TextIOWrapper, IO[Any], IO[bytes]],
|
||||
output_file: Union[TextIO, str],
|
||||
regex_match_revision: list[str],
|
||||
regex_match_comment: list[str],
|
||||
regex_revision_label: list[str],
|
||||
regex_comment_label: list[str],
|
||||
collapse_user: bool = False,
|
||||
persist: int = None,
|
||||
namespaces: Union[list[int], None] = None,
|
||||
revert_radius: int = 15,
|
||||
output_parquet: bool = True,
|
||||
parquet_buffer_size: int = 2000):
|
||||
"""
|
||||
Parameters:
|
||||
persist : what persistence method to use. Takes a PersistMethod value
|
||||
"""
|
||||
self.input_file = input_file
|
||||
|
||||
self.collapse_user = collapse_user
|
||||
self.persist = persist
|
||||
self.collapse_user: bool = collapse_user
|
||||
self.persist: int = persist
|
||||
self.namespaces = []
|
||||
self.urlencode = urlencode
|
||||
self.revert_radius = revert_radius
|
||||
|
||||
if namespaces is not None:
|
||||
@ -392,36 +232,9 @@ class WikiqParser:
|
||||
self.namespace_filter = None
|
||||
|
||||
self.regex_schemas = []
|
||||
self.regex_revision_pairs = self.make_matchmake_pairs(regex_match_revision, regex_revision_label)
|
||||
self.regex_comment_pairs = self.make_matchmake_pairs(regex_match_comment, regex_comment_label)
|
||||
|
||||
# This is where we set the type for revdata.
|
||||
|
||||
if self.collapse_user is True:
|
||||
if self.persist == PersistMethod.none:
|
||||
revdata_type = RevDataCollapse
|
||||
else:
|
||||
revdata_type = RevDataCollapsePersistence
|
||||
elif self.persist != PersistMethod.none:
|
||||
revdata_type = RevDataPersistence
|
||||
else:
|
||||
revdata_type = RevDataBase
|
||||
|
||||
# if there are regex fields, we need to add them to the revdata type.
|
||||
regex_fields = [(field.name, list[str], dc.field(default=None)) for field in self.regex_schemas]
|
||||
|
||||
# make_dataclass is a function that defines a new dataclass type.
|
||||
# here we extend the type we have already chosen and add the regular expression types
|
||||
self.revdata_type = dc.make_dataclass('RevData_Parser',
|
||||
fields=regex_fields,
|
||||
bases=(revdata_type,))
|
||||
|
||||
# we also need to make sure that we have the right pyarrow schema
|
||||
self.revdata_type.pa_schema_fields = revdata_type.pa_schema_fields + self.regex_schemas
|
||||
|
||||
self.revdata_type.urlencode = self.urlencode
|
||||
|
||||
self.schema = pa.schema(self.revdata_type.pa_schema_fields)
|
||||
self.regex_revision_pairs: list[RegexPair] = self.make_matchmake_pairs(regex_match_revision,
|
||||
regex_revision_label)
|
||||
self.regex_comment_pairs: list[RegexPair] = self.make_matchmake_pairs(regex_match_comment, regex_comment_label)
|
||||
|
||||
# here we initialize the variables we need for output.
|
||||
if output_parquet is True:
|
||||
@ -432,17 +245,17 @@ class WikiqParser:
|
||||
self.parquet_buffer_size = parquet_buffer_size
|
||||
else:
|
||||
self.print_header = True
|
||||
if output_file == sys.stdout:
|
||||
if output_file == sys.stdout.buffer:
|
||||
|
||||
self.output_file = output_file
|
||||
else:
|
||||
self.output_file = open(output_file, 'w')
|
||||
self.output_file = open(output_file, 'wb')
|
||||
self.output_parquet = False
|
||||
|
||||
def make_matchmake_pairs(self, patterns, labels):
|
||||
def make_matchmake_pairs(self, patterns, labels) -> list[RegexPair]:
|
||||
if (patterns is not None and labels is not None) and \
|
||||
(len(patterns) == len(labels)):
|
||||
result = []
|
||||
result: list[RegexPair] = []
|
||||
for pattern, label in zip(patterns, labels):
|
||||
rp = RegexPair(pattern, label)
|
||||
result.append(rp)
|
||||
@ -453,21 +266,25 @@ class WikiqParser:
|
||||
else:
|
||||
sys.exit('Each regular expression *must* come with a corresponding label and vice versa.')
|
||||
|
||||
def matchmake_revision(self, rev, rev_data):
|
||||
rev_data = self.matchmake_text(rev.text, rev_data)
|
||||
rev_data = self.matchmake_comment(rev.comment, rev_data)
|
||||
return rev_data
|
||||
def matchmake_revision(self, rev: mwxml.Revision):
|
||||
result = self.matchmake_text(rev.text)
|
||||
for k, v in self.matchmake_comment(rev.comment).items():
|
||||
result[k] = v
|
||||
return result
|
||||
|
||||
def matchmake_text(self, text, rev_data):
|
||||
return self.matchmake_pairs(text, rev_data, self.regex_revision_pairs)
|
||||
def matchmake_text(self, text: str):
|
||||
return self.matchmake_pairs(text, self.regex_revision_pairs)
|
||||
|
||||
def matchmake_comment(self, comment, rev_data):
|
||||
return self.matchmake_pairs(comment, rev_data, self.regex_comment_pairs)
|
||||
def matchmake_comment(self, comment: str):
|
||||
return self.matchmake_pairs(comment, self.regex_comment_pairs)
|
||||
|
||||
def matchmake_pairs(self, text, rev_data, pairs):
|
||||
@staticmethod
|
||||
def matchmake_pairs(text, pairs):
|
||||
result = {}
|
||||
for pair in pairs:
|
||||
rev_data = pair.matchmake(text, rev_data)
|
||||
return rev_data
|
||||
for k, v in pair.matchmake(text).items():
|
||||
result[k] = v
|
||||
return result
|
||||
|
||||
def __get_namespace_from_title(self, title):
|
||||
default_ns = None
|
||||
@ -494,87 +311,85 @@ class WikiqParser:
|
||||
# Construct dump file iterator
|
||||
dump = WikiqIterator(self.input_file, collapse_user=self.collapse_user)
|
||||
|
||||
reverts_column = tables.RevisionReverts()
|
||||
|
||||
table = RevisionTable([
|
||||
tables.RevisionId(),
|
||||
tables.RevisionTimestamp(),
|
||||
tables.RevisionArticleId(),
|
||||
tables.RevisionPageTitle(),
|
||||
tables.RevisionNamespace(),
|
||||
tables.RevisionDeleted(),
|
||||
tables.RevisionEditorId(),
|
||||
tables.RevisionEditSummary(),
|
||||
tables.RevisionTextChars(),
|
||||
reverts_column,
|
||||
tables.RevisionSha1(),
|
||||
tables.RevisionIsMinor(),
|
||||
tables.RevisionEditorText(),
|
||||
tables.RevisionIsAnon(),
|
||||
])
|
||||
|
||||
if self.collapse_user:
|
||||
table.columns.append(tables.RevisionCollapsed())
|
||||
|
||||
# extract list of namespaces
|
||||
self.namespaces = {ns.name: ns.id for ns in dump.mwiterator.site_info.namespaces}
|
||||
|
||||
page_count = 0
|
||||
rev_count = 0
|
||||
|
||||
writer: Union[pq.ParquetWriter, pacsv.CSVWriter]
|
||||
|
||||
schema = table.schema()
|
||||
schema = schema.append(pa.field('revert', pa.bool_(), nullable=True))
|
||||
|
||||
# Add regex fields to the schema.
|
||||
for pair in self.regex_revision_pairs:
|
||||
for field in pair.get_pyarrow_fields():
|
||||
schema = schema.append(field)
|
||||
|
||||
for pair in self.regex_comment_pairs:
|
||||
for field in pair.get_pyarrow_fields():
|
||||
schema = schema.append(field)
|
||||
|
||||
if self.persist != PersistMethod.none:
|
||||
table.columns.append(tables.RevisionText())
|
||||
schema = schema.append(pa.field('token_revs', pa.int64(), nullable=True))
|
||||
schema = schema.append(pa.field('tokens_added', pa.int64(), nullable=True))
|
||||
schema = schema.append(pa.field('tokens_removed', pa.int64(), nullable=True))
|
||||
schema = schema.append(pa.field('tokens_window', pa.int64(), nullable=True))
|
||||
|
||||
if self.output_parquet:
|
||||
writer = pq.ParquetWriter(self.output_file, schema, flavor='spark')
|
||||
else:
|
||||
writer = pacsv.CSVWriter(self.output_file, schema, write_options=pacsv.WriteOptions(delimiter='\t'))
|
||||
|
||||
regex_matches = {}
|
||||
|
||||
# Iterate through pages
|
||||
for page in dump:
|
||||
namespace = page.namespace if page.namespace is not None else self.__get_namespace_from_title(page.title)
|
||||
|
||||
# skip namespaces not in the filter
|
||||
if self.namespace_filter is not None:
|
||||
if namespace not in self.namespace_filter:
|
||||
if page.mwpage.namespace not in self.namespace_filter:
|
||||
continue
|
||||
|
||||
rev_detector = mwreverts.Detector(radius=self.revert_radius)
|
||||
|
||||
if self.persist != PersistMethod.none:
|
||||
window = deque(maxlen=PERSISTENCE_RADIUS)
|
||||
|
||||
if self.persist == PersistMethod.sequence:
|
||||
state = mwpersistence.DiffState(SequenceMatcher(tokenizer=wikitext_split),
|
||||
revert_radius=PERSISTENCE_RADIUS)
|
||||
|
||||
elif self.persist == PersistMethod.segment:
|
||||
state = mwpersistence.DiffState(SegmentMatcher(tokenizer=wikitext_split),
|
||||
revert_radius=PERSISTENCE_RADIUS)
|
||||
|
||||
# self.persist == PersistMethod.legacy
|
||||
else:
|
||||
from mw.lib import persistence
|
||||
state = persistence.State()
|
||||
# Disable detecting reverts if radius is 0.
|
||||
if self.revert_radius > 0:
|
||||
reverts_column.rev_detector = mwreverts.Detector(radius=self.revert_radius)
|
||||
else:
|
||||
reverts_column.rev_detector = None
|
||||
|
||||
# Iterate through a page's revisions
|
||||
for rev in page:
|
||||
for revs in page:
|
||||
# Revisions may or may not be grouped into lists of contiguous revisions by the
|
||||
# same user. We call these "edit sessions". Otherwise revs is a list containing
|
||||
# exactly one revision.
|
||||
revs = list(revs)
|
||||
revs = fix_hex_digests(revs)
|
||||
|
||||
# create a new data object instead of a dictionary.
|
||||
rev_data = self.revdata_type(revid=rev.id,
|
||||
date_time=datetime.fromtimestamp(rev.timestamp.unix(), tz=timezone.utc),
|
||||
articleid=page.id,
|
||||
editorid=None if rev.deleted.user == True or rev.user.id is None else rev.user.id,
|
||||
title=page.title,
|
||||
deleted=rev.deleted.text,
|
||||
namespace=namespace,
|
||||
edit_summary=rev.comment
|
||||
)
|
||||
|
||||
rev_data = self.matchmake_revision(rev, rev_data)
|
||||
|
||||
if not rev.deleted.text:
|
||||
# rev.text can be None if the page has no text
|
||||
if not rev.text:
|
||||
rev.text = ""
|
||||
# if text exists, we'll check for a sha1 and generate one otherwise
|
||||
|
||||
if rev.sha1:
|
||||
text_sha1 = rev.sha1
|
||||
else:
|
||||
text_sha1 = sha1(bytes(rev.text, "utf8")).hexdigest()
|
||||
|
||||
rev_data.sha1 = text_sha1
|
||||
|
||||
# TODO rev.bytes doesn't work.. looks like a bug
|
||||
rev_data.text_chars = len(rev.text)
|
||||
|
||||
# generate revert data
|
||||
revert = rev_detector.process(text_sha1, rev.id)
|
||||
|
||||
if revert:
|
||||
rev_data.revert = True
|
||||
rev_data.reverteds = revert.reverteds
|
||||
else:
|
||||
rev_data.revert = False
|
||||
|
||||
# if the fact that the edit was minor can be hidden, this might be an issue
|
||||
rev_data.minor = rev.minor
|
||||
|
||||
if not rev.deleted.user:
|
||||
# wrap user-defined editors in quotes for fread
|
||||
rev_data.editor = rev.user.text
|
||||
rev_data.anon = rev.user.id is None
|
||||
table.add(page.mwpage, revs)
|
||||
|
||||
# if re.match(r'^#redirect \[\[.*\]\]', rev.text, re.I):
|
||||
# redirect = True
|
||||
@ -583,129 +398,93 @@ class WikiqParser:
|
||||
|
||||
# TODO missing: additions_size deletions_size
|
||||
|
||||
# if collapse user was on, let's run that
|
||||
if self.collapse_user:
|
||||
rev_data.collapsed_revs = rev.collapsed_revs
|
||||
|
||||
# get the
|
||||
if self.persist != PersistMethod.none:
|
||||
if not rev.deleted.text:
|
||||
|
||||
if self.persist != PersistMethod.legacy:
|
||||
_, tokens_added, tokens_removed = state.update(rev.text, rev.id)
|
||||
|
||||
else:
|
||||
_, tokens_added, tokens_removed = state.process(rev.text, rev.id, text_sha1)
|
||||
|
||||
window.append((rev.id, rev_data, tokens_added, tokens_removed))
|
||||
|
||||
if len(window) == PERSISTENCE_RADIUS:
|
||||
old_rev_id, old_rev_data, old_tokens_added, old_tokens_removed = window[0]
|
||||
|
||||
num_token_revs, num_tokens = calculate_persistence(old_tokens_added)
|
||||
|
||||
old_rev_data.token_revs = num_token_revs
|
||||
old_rev_data.tokens_added = num_tokens
|
||||
old_rev_data.tokens_removed = len(old_tokens_removed)
|
||||
old_rev_data.tokens_window = PERSISTENCE_RADIUS - 1
|
||||
|
||||
self.print_rev_data(old_rev_data)
|
||||
|
||||
else:
|
||||
self.print_rev_data(rev_data)
|
||||
|
||||
rev_count += 1
|
||||
|
||||
# Get the last revision in the edit session.
|
||||
rev = revs[-1]
|
||||
regex_dict = self.matchmake_revision(rev)
|
||||
for k, v in regex_dict.items():
|
||||
if regex_matches.get(k) is None:
|
||||
regex_matches[k] = []
|
||||
regex_matches[k].append(v)
|
||||
|
||||
# Collect the set of pages currently buffered in the table so we can run multi-page functions on them.
|
||||
row_buffer = table.pop()
|
||||
|
||||
is_revert_column: list[Union[bool, None]] = []
|
||||
for r, d in zip(row_buffer['reverteds'], row_buffer['deleted']):
|
||||
if self.revert_radius == 0 or d:
|
||||
is_revert_column.append(None)
|
||||
else:
|
||||
is_revert_column.append(r is not None)
|
||||
|
||||
row_buffer['revert'] = is_revert_column
|
||||
|
||||
for k, v in regex_matches.items():
|
||||
row_buffer[k] = v
|
||||
regex_matches = {}
|
||||
|
||||
if self.persist != PersistMethod.none:
|
||||
window = deque(maxlen=PERSISTENCE_RADIUS)
|
||||
|
||||
row_buffer['token_revs'] = []
|
||||
row_buffer['tokens_added'] = []
|
||||
row_buffer['tokens_removed'] = []
|
||||
row_buffer['tokens_window'] = []
|
||||
|
||||
if self.persist == PersistMethod.sequence:
|
||||
state = mwpersistence.DiffState(SequenceMatcher(tokenizer=wikitext_split),
|
||||
revert_radius=PERSISTENCE_RADIUS)
|
||||
elif self.persist == PersistMethod.segment:
|
||||
state = mwpersistence.DiffState(SegmentMatcher(tokenizer=wikitext_split),
|
||||
revert_radius=PERSISTENCE_RADIUS)
|
||||
else:
|
||||
from mw.lib import persistence
|
||||
state = persistence.State()
|
||||
|
||||
for idx, text in enumerate(row_buffer['text']):
|
||||
rev_id = row_buffer['revid'][idx]
|
||||
if self.persist != PersistMethod.legacy:
|
||||
_, tokens_added, tokens_removed = state.update(text, rev_id)
|
||||
else:
|
||||
_, tokens_added, tokens_removed = state.process(text, rev_id)
|
||||
|
||||
window.append((rev_id, tokens_added, tokens_removed))
|
||||
|
||||
if len(window) == PERSISTENCE_RADIUS:
|
||||
old_rev_id, old_tokens_added, old_tokens_removed = window.popleft()
|
||||
num_token_revs, num_tokens = calculate_persistence(old_tokens_added)
|
||||
|
||||
row_buffer['token_revs'].append(num_token_revs)
|
||||
row_buffer['tokens_added'].append(num_tokens)
|
||||
row_buffer['tokens_removed'].append(len(old_tokens_removed))
|
||||
row_buffer['tokens_window'].append(PERSISTENCE_RADIUS - 1)
|
||||
|
||||
del row_buffer['text']
|
||||
|
||||
# print out metadata for the last RADIUS revisions
|
||||
for i, item in enumerate(window):
|
||||
# if the window was full, we've already printed item 0
|
||||
if len(window) == PERSISTENCE_RADIUS and i == 0:
|
||||
continue
|
||||
|
||||
rev_id, rev_data, tokens_added, tokens_removed = item
|
||||
rev_id, tokens_added, tokens_removed = item
|
||||
num_token_revs, num_tokens = calculate_persistence(tokens_added)
|
||||
|
||||
rev_data.token_revs = num_token_revs
|
||||
rev_data.tokens_added = num_tokens
|
||||
rev_data.tokens_removed = len(tokens_removed)
|
||||
rev_data.tokens_window = len(window) - (i + 1)
|
||||
self.print_rev_data(rev_data)
|
||||
row_buffer['token_revs'].append(num_token_revs)
|
||||
row_buffer['tokens_added'].append(num_tokens)
|
||||
row_buffer['tokens_removed'].append(len(tokens_removed))
|
||||
row_buffer['tokens_window'].append(len(window) - (i + 1))
|
||||
|
||||
writer.write(pa.table(row_buffer, schema=schema))
|
||||
|
||||
page_count += 1
|
||||
|
||||
print("Done: %s revisions and %s pages." % (rev_count, page_count),
|
||||
file=sys.stderr)
|
||||
|
||||
# remember to flush the parquet_buffer if we're done
|
||||
if self.output_parquet is True:
|
||||
self.flush_parquet_buffer()
|
||||
self.pq_writer.close()
|
||||
writer.close()
|
||||
|
||||
else:
|
||||
self.output_file.close()
|
||||
|
||||
"""
|
||||
For performance reasons it's better to write parquet in batches instead of one row at a time.
|
||||
So this function just puts the data on a buffer. If the buffer is full, then it gets flushed (written).
|
||||
"""
|
||||
|
||||
def write_parquet_row(self, rev_data):
|
||||
padata = rev_data.to_pyarrow()
|
||||
self.parquet_buffer.append(padata)
|
||||
|
||||
if len(self.parquet_buffer) >= self.parquet_buffer_size:
|
||||
self.flush_parquet_buffer()
|
||||
|
||||
"""
|
||||
Function that actually writes data to the parquet file.
|
||||
It needs to transpose the data from row-by-row to column-by-column
|
||||
"""
|
||||
|
||||
def flush_parquet_buffer(self):
|
||||
|
||||
"""
|
||||
Returns the pyarrow table that we'll write
|
||||
"""
|
||||
|
||||
def rows_to_table(rg, schema):
|
||||
cols = []
|
||||
first = rg[0]
|
||||
for col in first:
|
||||
cols.append([col])
|
||||
|
||||
for row in rg[1:]:
|
||||
for j in range(len(cols)):
|
||||
cols[j].append(row[j])
|
||||
|
||||
arrays = []
|
||||
for col, typ in zip(cols, schema.types):
|
||||
arrays.append(pa.array(col, typ))
|
||||
return pa.Table.from_arrays(arrays, schema=schema)
|
||||
|
||||
outtable = rows_to_table(self.parquet_buffer, self.schema)
|
||||
if self.pq_writer is None:
|
||||
self.pq_writer = pq.ParquetWriter(self.output_file, self.schema, flavor='spark')
|
||||
|
||||
self.pq_writer.write_table(outtable)
|
||||
self.parquet_buffer = []
|
||||
|
||||
# depending on if we are configured to write tsv or parquet, we'll call a different function.
|
||||
def print_rev_data(self, rev_data):
|
||||
if self.output_parquet is False:
|
||||
printfunc = self.write_tsv_row
|
||||
else:
|
||||
printfunc = self.write_parquet_row
|
||||
|
||||
printfunc(rev_data)
|
||||
|
||||
def write_tsv_row(self, rev_data):
|
||||
if self.print_header:
|
||||
print(rev_data.header_row(), file=self.output_file)
|
||||
self.print_header = False
|
||||
|
||||
line = rev_data.to_tsv_row()
|
||||
print(line, file=self.output_file)
|
||||
|
||||
def match_archive_suffix(input_filename):
|
||||
if re.match(r'.*\.7z$', input_filename):
|
||||
@ -718,9 +497,9 @@ def match_archive_suffix(input_filename):
|
||||
raise ValueError("Unrecognized file type: %s" % input_filename)
|
||||
return cmd
|
||||
|
||||
|
||||
def open_input_file(input_filename, fandom_2020=False):
|
||||
cmd = match_archive_suffix(input_filename)
|
||||
|
||||
if fandom_2020:
|
||||
cmd.append("*.xml")
|
||||
try:
|
||||
@ -729,7 +508,7 @@ def open_input_file(input_filename, fandom_2020=False):
|
||||
return open(input_filename, 'r')
|
||||
|
||||
|
||||
def get_output_filename(input_filename, parquet=False):
|
||||
def get_output_filename(input_filename, parquet=False) -> str:
|
||||
output_filename = re.sub(r'\.(7z|gz|bz2)?$', '', input_filename)
|
||||
output_filename = re.sub(r'\.xml', '', output_filename)
|
||||
if parquet is False:
|
||||
@ -753,7 +532,7 @@ def main():
|
||||
parser.add_argument('dumpfiles', metavar="DUMPFILE", nargs="*", type=str,
|
||||
help="Filename of the compressed or uncompressed XML database dump. If absent, we'll look for content on stdin and output on stdout.")
|
||||
|
||||
parser.add_argument('-o', '--output-dir', metavar='DIR', dest='output_dir', type=str, nargs=1,
|
||||
parser.add_argument('-o', '--output', metavar='OUTPUT', dest='output', type=str, nargs=1,
|
||||
help="Directory for output files. If it ends with .parquet output will be in parquet format.")
|
||||
|
||||
parser.add_argument('-s', '--stdout', dest="stdout", action="store_true",
|
||||
@ -766,9 +545,6 @@ def main():
|
||||
choices=['', 'segment', 'sequence', 'legacy'], nargs='?',
|
||||
help="Compute and report measures of content persistent: (1) persistent token revisions, (2) tokens added, and (3) number of revision used in computing the first measure. This may by slow. The default is -p=sequence, which uses the same algorithm as in the past, but with improvements to wikitext parsing. Use -p=legacy for old behavior used in older research projects. Use -p=segment for advanced persistence calculation method that is robust to content moves, but prone to bugs, and slower.")
|
||||
|
||||
parser.add_argument('-u', '--url-encode', dest="urlencode", action="store_true",
|
||||
help="Output url encoded text strings. This works around some data issues like newlines in editor names. In the future it may be used to output other text data.")
|
||||
|
||||
parser.add_argument('-n', '--namespace-include', dest="namespace_filter", type=int, action='append',
|
||||
help="Id number of namespace to include. Can be specified more than once.")
|
||||
|
||||
@ -818,32 +594,32 @@ def main():
|
||||
namespaces = None
|
||||
|
||||
if len(args.dumpfiles) > 0:
|
||||
output_parquet = False
|
||||
for filename in args.dumpfiles:
|
||||
input_file = open_input_file(filename, args.fandom_2020)
|
||||
|
||||
# open directory for output
|
||||
if args.output_dir:
|
||||
output_dir = args.output_dir[0]
|
||||
if args.output:
|
||||
output = args.output[0]
|
||||
else:
|
||||
output_dir = "."
|
||||
output = "."
|
||||
|
||||
if output_dir.endswith(".parquet"):
|
||||
output_parquet = True
|
||||
output_parquet = output.endswith(".parquet")
|
||||
|
||||
print("Processing file: %s" % filename, file=sys.stderr)
|
||||
|
||||
if args.stdout:
|
||||
output_file = sys.stdout
|
||||
else:
|
||||
filename = os.path.join(output_dir, os.path.basename(filename))
|
||||
# Parquet libraries need a binary output, so just sys.stdout doesn't work.
|
||||
output_file = sys.stdout.buffer
|
||||
elif os.path.isdir(output) or output_parquet:
|
||||
filename = os.path.join(output, os.path.basename(filename))
|
||||
output_file = get_output_filename(filename, parquet=output_parquet)
|
||||
else:
|
||||
output_file = output
|
||||
|
||||
wikiq = WikiqParser(input_file,
|
||||
output_file,
|
||||
collapse_user=args.collapse_user,
|
||||
persist=persist,
|
||||
urlencode=args.urlencode,
|
||||
namespaces=namespaces,
|
||||
revert_radius=args.revert_radius,
|
||||
regex_match_revision=args.regex_match_revision,
|
||||
@ -863,7 +639,6 @@ def main():
|
||||
collapse_user=args.collapse_user,
|
||||
persist=persist,
|
||||
# persist_legacy=args.persist_legacy,
|
||||
urlencode=args.urlencode,
|
||||
namespaces=namespaces,
|
||||
revert_radius=args.revert_radius,
|
||||
regex_match_revision=args.regex_match_revision,
|
||||
|
Loading…
Reference in New Issue
Block a user
lets note that when the collapse-revs behavior isn't enabled that we're only passing lists of one revision.