Compare commits
	
		
			66 Commits
		
	
	
		
			master
			...
			parquet_su
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|  | 0d9ab003f0 | ||
|  | 4bbed4a196 | ||
|  | 11d2587471 | ||
|  | 586ae85c65 | ||
|  | 390499dd90 | ||
|  | 84d464ea38 | ||
|  | 3e8ae205e8 | ||
|  | 8c707f5ef3 | ||
|  | b50c51a215 | ||
|  | 89465b29f4 | ||
|  | 17c7f208ab | ||
|  | 123b9a18a8 | ||
|  | 06a784ef27 | ||
|  | 8b0f775610 | ||
|  | f916af9836 | ||
|  | 9ee5ecfc91 | ||
|  | f9383440a0 | ||
|  | 032fec3198 | ||
|  | 0d56267ae0 | ||
|  | 260e2b177c | ||
|  | a13d7f1deb | ||
|  | ffbd180001 | ||
|  | 606a399450 | ||
|  | a9f76a0f62 | ||
|  | f39ceefa4a | ||
|  | 13ee160708 | ||
|  | bd22d26291 | ||
|  | 4dde25c508 | ||
|  | aec6e5fafa | ||
|  | c0e629a313 | ||
|  | 9009bb6fa4 | ||
|  | ab280dd765 | ||
|  | 22d14dc5f2 | ||
|  | 5a10f59dc4 | ||
|  | b8cdc82fc2 | ||
|  | 2a2b611d79 | ||
|  | 39fec0820d | ||
|  | 383ee03250 | ||
|  | 15e9234903 | ||
|  | 8c7d46472f | ||
|  | 3c7fb088d6 | ||
|  | ee01ce3e61 | ||
|  | 52757a8239 | ||
|  | d413443740 | ||
|  | 3f94144b1b | ||
|  | df0ad1de63 | ||
|  | f3e6cc9392 | ||
|  | c8b14c3303 | ||
|  | 4d3900b541 | ||
|  | ebc57864f2 | ||
|  | 3d0bf89938 | ||
|  | 6d133575c7 | ||
|  | 09a84e7d11 | ||
|  | 9c5bf577e6 | ||
|  | 4804ecc4b3 | ||
|  | 7a4c41159c | ||
| 1aea601a30 | |||
| c437b357db | |||
| bb83d62b74 | |||
| c285402683 | |||
| b1bea09ad6 | |||
| 9a0c157ebb | |||
| ae870fed0b | |||
| 26f6d8f984 | |||
| ae9a241747 | |||
| d8d20f670b | 
							
								
								
									
										10
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										10
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							| @ -3,3 +3,13 @@ | ||||
| *.xml.bz2 | ||||
| *.xml.xz | ||||
| *.swp | ||||
| 
 | ||||
| # Lockfiles | ||||
| uv.lock | ||||
| 
 | ||||
| # JetBrains | ||||
| /.idea | ||||
| 
 | ||||
| # Python build and test output | ||||
| __pycache__/ | ||||
| /test/test_output/ | ||||
|  | ||||
							
								
								
									
										1
									
								
								.python-version
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1
									
								
								.python-version
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1 @@ | ||||
| 3.9 | ||||
							
								
								
									
										19
									
								
								README.rst
									
									
									
									
									
								
							
							
						
						
									
										19
									
								
								README.rst
									
									
									
									
									
								
							| @ -11,3 +11,22 @@ submodule like:: | ||||
| 
 | ||||
| Wikimedia dumps are usually in a compressed format such as 7z (most common), gz, or bz2. Wikiq uses your computer's compression software to read these files. Therefore wikiq depends on | ||||
| `7za`, `gzcat`, and `zcat`.  | ||||
| 
 | ||||
| Dependencies | ||||
| ---------------- | ||||
| These non-Python dependencies must be installed on your system for wikiq and its | ||||
| associated tests to work. | ||||
| 
 | ||||
| - 7zip | ||||
| - ffmpeg | ||||
| 
 | ||||
| Tests | ||||
| ---- | ||||
| To run tests:: | ||||
| 
 | ||||
|    python -m unittest test.Wikiq_Unit_Test | ||||
| 
 | ||||
| TODO: | ||||
| _______________ | ||||
| 1. [] Output metadata about the run. What parameters were used? What versions of deltas? | ||||
| 2. [] Url encoding by default | ||||
|  | ||||
							
								
								
									
										25
									
								
								pyproject.toml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										25
									
								
								pyproject.toml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,25 @@ | ||||
| [project] | ||||
| name = "mediawiki-dump-tools" | ||||
| version = "0.1.0" | ||||
| description = "Add your description here" | ||||
| readme = "README.md" | ||||
| requires-python = "~=3.9" | ||||
| dependencies = [ | ||||
|     "deltas>=0.7.0", | ||||
|     "mediawiki-utilities>=0.4.18", | ||||
|     "mwpersistence>=0.2.4", | ||||
|     "mwreverts>=0.1.5", | ||||
|     "mwtypes>=0.4.0", | ||||
|     "mwxml>=0.3.6", | ||||
|     "pyarrow>=20.0.0", | ||||
|     "yamlconf>=0.2.6", | ||||
| ] | ||||
| 
 | ||||
| [tool.uv.sources] | ||||
| yamlconf = { git = "https://github.com/groceryheist/yamlconf" } | ||||
| mwxml = { git = "https://github.com/groceryheist/python-mwxml" } | ||||
| 
 | ||||
| [dependency-groups] | ||||
| dev = [ | ||||
|     "pandas>=2.1.0" | ||||
| ] | ||||
							
								
								
									
										220
									
								
								tables.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										220
									
								
								tables.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,220 @@ | ||||
| import sys | ||||
| from abc import abstractmethod, ABC | ||||
| from datetime import datetime, timezone | ||||
| from hashlib import sha1 | ||||
| from typing import Generic, TypeVar, Union | ||||
| 
 | ||||
| import mwreverts | ||||
| import mwtypes | ||||
| import mwxml | ||||
| 
 | ||||
| import pyarrow as pa | ||||
| 
 | ||||
| T = TypeVar('T') | ||||
| 
 | ||||
| 
 | ||||
| class RevisionField(ABC, Generic[T]): | ||||
|     def __init__(self): | ||||
|         self.data: list[T] = [] | ||||
| 
 | ||||
|     """ | ||||
|     Abstract type which represents a field in a table of page revisions. | ||||
|     """ | ||||
| 
 | ||||
|     @property | ||||
|     @abstractmethod | ||||
|     def field(self) -> pa.Field: | ||||
|         pass | ||||
| 
 | ||||
|     @abstractmethod | ||||
|     def extract(self, page: mwtypes.Page, revisions: list[mwxml.Revision]) -> T: | ||||
|         """ | ||||
|         :param page: The page for this set of revisions. | ||||
|         :param revisions: The set of revisions to compute the field from. | ||||
|         Revisions are passed in chronological order, so use revisions[-1] to | ||||
|         access the most recent revision in the set. | ||||
| 
 | ||||
|         Implementations of extract should handle the case where revisions is | ||||
|         either a single revision (collapse-user=FALSE), or a full edit session | ||||
|         of contiguous edits by the same user (collapse-user=TRUE). | ||||
|         """ | ||||
|         pass | ||||
| 
 | ||||
|     def add(self, page: mwtypes.Page, revisions: list[mwxml.Revision]) -> None: | ||||
|         self.data.append(self.extract(page, revisions)) | ||||
| 
 | ||||
|     def pop(self) -> list[T]: | ||||
|         data = self.data | ||||
|         self.data = [] | ||||
|         return data | ||||
| 
 | ||||
| 
 | ||||
| class RevisionTable: | ||||
|     columns: list[RevisionField] | ||||
| 
 | ||||
|     def __init__(self, columns: list[RevisionField]): | ||||
|         self.columns = columns | ||||
| 
 | ||||
|     def add(self, page: mwtypes.Page, revisions: list[mwxml.Revision]): | ||||
|         for column in self.columns: | ||||
|             column.add(page=page, revisions=revisions) | ||||
| 
 | ||||
|     def schema(self) -> pa.Schema: | ||||
|         return pa.schema([c.field for c in self.columns]) | ||||
| 
 | ||||
|     def pop(self) -> dict: | ||||
|         data = {} | ||||
|         for column in self.columns: | ||||
|             data[column.field.name] = column.pop() | ||||
| 
 | ||||
|         return data | ||||
| 
 | ||||
| 
 | ||||
| class RevisionId(RevisionField[int]): | ||||
|     field = pa.field("revid", pa.int64()) | ||||
| 
 | ||||
|     def extract(self, _: mwtypes.Page, revisions: list[mwxml.Revision]) -> int: | ||||
|         revision = revisions[-1] | ||||
|         return revision.id | ||||
| 
 | ||||
| 
 | ||||
| class RevisionTimestamp(RevisionField[datetime]): | ||||
|     field = pa.field("date_time", pa.timestamp('s')) | ||||
| 
 | ||||
|     def extract(self, page: mwtypes.Page, revisions: list[mwxml.Revision]) -> datetime: | ||||
|         revision = revisions[-1] | ||||
|         return revision.timestamp | ||||
| 
 | ||||
| 
 | ||||
| class RevisionArticleId(RevisionField[int]): | ||||
|     field = pa.field("articleid", pa.int64()) | ||||
| 
 | ||||
|     def extract(self, page: mwtypes.Page, revisions: list[mwxml.Revision]) -> int: | ||||
|         return page.id | ||||
| 
 | ||||
| 
 | ||||
| class RevisionEditorId(RevisionField[Union[int, None]]): | ||||
|     field = pa.field("editorid", pa.int64(), nullable=True) | ||||
| 
 | ||||
|     def extract(self, page: mwtypes.Page, revisions: list[mwxml.Revision]) -> Union[int, None]: | ||||
|         revision = revisions[-1] | ||||
|         if revision.deleted.user: | ||||
|             return None | ||||
| 
 | ||||
|         return revision.user.id | ||||
| 
 | ||||
| 
 | ||||
| class RevisionEditSummary(RevisionField[Union[str, None]]): | ||||
|     field = pa.field("edit_summary", pa.string(), nullable=True) | ||||
| 
 | ||||
|     def extract(self, page: mwtypes.Page, revisions: list[mwxml.Revision]) -> Union[str, None]: | ||||
|         revision = revisions[-1] | ||||
| 
 | ||||
|         return revision.comment | ||||
| 
 | ||||
| class RevisionIsAnon(RevisionField[Union[bool, None]]): | ||||
|     field = pa.field("anon", pa.bool_(), nullable=True) | ||||
| 
 | ||||
|     def extract(self, page: mwtypes.Page, revisions: list[mwxml.Revision]) -> Union[bool, None]: | ||||
|         revision = revisions[-1] | ||||
|         if revision.deleted.user: | ||||
|             return None | ||||
| 
 | ||||
|         return revision.user.id is None | ||||
| 
 | ||||
| 
 | ||||
| class RevisionEditorText(RevisionField[Union[str, None]]): | ||||
|     field = pa.field("editor", pa.string(), nullable=True) | ||||
| 
 | ||||
|     def extract(self, page: mwtypes.Page, revisions: list[mwxml.Revision]) -> Union[str, None]: | ||||
|         revision = revisions[-1] | ||||
|         if revision.deleted.user: | ||||
|             return None | ||||
| 
 | ||||
|         return revision.user.text | ||||
| 
 | ||||
| 
 | ||||
| class RevisionPageTitle(RevisionField[str]): | ||||
|     field = pa.field("title", pa.string()) | ||||
| 
 | ||||
|     def extract(self, page: mwtypes.Page, revisions: list[mwxml.Revision]) -> str: | ||||
|         return page.title | ||||
| 
 | ||||
| 
 | ||||
| class RevisionDeleted(RevisionField[bool]): | ||||
|     field = pa.field("deleted", pa.bool_()) | ||||
| 
 | ||||
|     def extract(self, page: mwtypes.Page, revisions: list[mwxml.Revision]) -> bool: | ||||
|         revision = revisions[-1] | ||||
|         return revision.deleted.text | ||||
| 
 | ||||
| 
 | ||||
| class RevisionNamespace(RevisionField[int]): | ||||
|     field = pa.field("namespace", pa.int32()) | ||||
| 
 | ||||
|     def extract(self, page: mwtypes.Page, revisions: list[mwxml.Revision]) -> int: | ||||
|         return page.namespace | ||||
| 
 | ||||
| 
 | ||||
| class RevisionSha1(RevisionField[str]): | ||||
|     field = pa.field("sha1", pa.string()) | ||||
| 
 | ||||
|     def extract(self, page: mwtypes.Page, revisions: list[mwxml.Revision]) -> str: | ||||
|         revision = revisions[-1] | ||||
|         return revision.sha1 | ||||
| 
 | ||||
| 
 | ||||
| class RevisionTextChars(RevisionField[Union[int, None]]): | ||||
|     field = pa.field("text_chars", pa.int32(), nullable=True) | ||||
| 
 | ||||
|     def extract(self, page: mwtypes.Page, revisions: list[mwxml.Revision]) -> Union[int, None]: | ||||
|         revision = revisions[-1] | ||||
|         if not revision.deleted.text: | ||||
|             return len(revision.text) | ||||
| 
 | ||||
|         return None | ||||
| 
 | ||||
| 
 | ||||
| class RevisionText(RevisionField[str]): | ||||
|     field = pa.field("text", pa.string()) | ||||
| 
 | ||||
|     def extract(self, page: mwtypes.Page, revisions: list[mwxml.Revision]) -> str: | ||||
|         revision = revisions[-1] | ||||
|         return revision.text | ||||
| 
 | ||||
| 
 | ||||
| class RevisionIsMinor(RevisionField[bool]): | ||||
|     field = pa.field("minor", pa.bool_()) | ||||
| 
 | ||||
|     def extract(self, page: mwtypes.Page, revisions: list[mwxml.Revision]) -> bool: | ||||
|         revision = revisions[-1] | ||||
|         return revision.minor | ||||
| 
 | ||||
| 
 | ||||
| class RevisionReverts(RevisionField[Union[str, None]]): | ||||
|     def __init__(self): | ||||
|         super().__init__() | ||||
|         self.rev_detector: Union[mwreverts.Detector, None] = None | ||||
| 
 | ||||
|     field = pa.field("reverteds", pa.string(), nullable=True) | ||||
| 
 | ||||
|     def extract(self, page: mwtypes.Page, revisions: list[mwxml.Revision]) -> Union[str, None]: | ||||
|         if self.rev_detector is None: | ||||
|             return None | ||||
| 
 | ||||
|         revision = revisions[-1] | ||||
|         if revision.deleted.text: | ||||
|             return None | ||||
| 
 | ||||
|         revert = self.rev_detector.process(revision.sha1, revision.id) | ||||
|         if revert is None: | ||||
|             return None | ||||
| 
 | ||||
|         return ",".join([str(s) for s in revert.reverteds]) | ||||
| 
 | ||||
| 
 | ||||
| class RevisionCollapsed(RevisionField[int]): | ||||
|     field = pa.field("collapsed_revs", pa.int64()) | ||||
| 
 | ||||
|     def extract(self, page: mwtypes.Page, revisions: list[mwxml.Revision]) -> int: | ||||
|         return len(revisions) | ||||
| @ -1,398 +1,365 @@ | ||||
| import shutil | ||||
| import sys | ||||
| import unittest | ||||
| import os | ||||
| import subprocess | ||||
| from shutil import copyfile | ||||
| 
 | ||||
| import numpy as np | ||||
| import pandas as pd | ||||
| from pandas.util.testing import assert_frame_equal | ||||
| from pandas import DataFrame | ||||
| from pandas.testing import assert_frame_equal, assert_series_equal | ||||
| from io import StringIO | ||||
| import tracemalloc | ||||
| from typing import Final, Union | ||||
| 
 | ||||
| # Make references to files and wikiq relative to this file, not to the current working directory. | ||||
| TEST_DIR: Final[str] = os.path.dirname(os.path.realpath(__file__)) | ||||
| WIKIQ: Final[str] = os.path.join(os.path.dirname(TEST_DIR), "wikiq") | ||||
| TEST_OUTPUT_DIR: Final[str] = os.path.join(TEST_DIR, "test_output") | ||||
| BASELINE_DIR: Final[str] = os.path.join(TEST_DIR, "baseline_output") | ||||
| 
 | ||||
| IKWIKI: Final[str] = "ikwiki-20180301-pages-meta-history" | ||||
| SAILORMOON: Final[str] = "sailormoon" | ||||
| TWINPEAKS: Final[str] = "twinpeaks" | ||||
| REGEXTEST: Final[str] = "regextest" | ||||
| 
 | ||||
| 
 | ||||
| def setup(): | ||||
|     tracemalloc.start() | ||||
| 
 | ||||
|     # Perform directory check and reset here as this is a one-time setup step as opposed to per-test setup. | ||||
|     if not os.path.exists(TEST_OUTPUT_DIR): | ||||
|         os.mkdir(TEST_OUTPUT_DIR) | ||||
| 
 | ||||
| 
 | ||||
| # Always run setup, even if this is executed via "python -m unittest" rather | ||||
| # than as __main__. | ||||
| setup() | ||||
| 
 | ||||
| 
 | ||||
| class WikiqTester: | ||||
|     def __init__(self, | ||||
|                  wiki: str, | ||||
|                  case_name: str, | ||||
|                  suffix: Union[str, None] = None, | ||||
|                  in_compression: str = "bz2", | ||||
|                  baseline_format: str = "tsv", | ||||
|                  out_format: str = "tsv", | ||||
|                  ): | ||||
|         self.input_file = os.path.join(TEST_DIR, "dumps", "{0}.xml.{1}".format(wiki, in_compression)) | ||||
| 
 | ||||
|         basename = "{0}_{1}".format(case_name, wiki) | ||||
|         if suffix: | ||||
|             basename = "{0}_{1}".format(basename, suffix) | ||||
| 
 | ||||
|         self.output = os.path.join(TEST_OUTPUT_DIR, "{0}.{1}".format(basename, out_format)) | ||||
| 
 | ||||
|         if os.path.exists(self.output): | ||||
|             if os.path.isfile(self.output): | ||||
|                 os.remove(self.output) | ||||
|             else: | ||||
|                 shutil.rmtree(self.output) | ||||
| 
 | ||||
|         if out_format == "parquet": | ||||
|             os.makedirs(self.output, exist_ok=True) | ||||
| 
 | ||||
|         if suffix is None: | ||||
|             self.wikiq_baseline_name = "{0}.{1}".format(wiki, baseline_format) | ||||
|             self.wikiq_out_name = "{0}.{1}".format(wiki, out_format) | ||||
|         else: | ||||
|             self.wikiq_baseline_name = "{0}_{1}.{2}".format(wiki, suffix, baseline_format) | ||||
|             self.wikiq_out_name = "{0}_{1}.{2}".format(wiki, suffix, out_format) | ||||
| 
 | ||||
|         # If case_name is unset, there are no relevant baseline or test files. | ||||
|         if case_name is not None: | ||||
|             self.baseline_file = os.path.join(BASELINE_DIR, "{0}_{1}".format(case_name, self.wikiq_baseline_name)) | ||||
| 
 | ||||
|     def call_wikiq(self, *args: str, out: bool = True): | ||||
|         """ | ||||
|         Calls wikiq with the passed arguments on the input file relevant to the test. | ||||
|         :param args: The command line arguments to pass to wikiq. | ||||
|         :param out: Whether to pass an output argument to wikiq. | ||||
|         :return: The output of the wikiq call. | ||||
|         """ | ||||
|         if out: | ||||
|             call = ' '.join([WIKIQ, self.input_file, "-o", self.output, *args]) | ||||
|         else: | ||||
|             call = ' '.join([WIKIQ, self.input_file, *args]) | ||||
| 
 | ||||
|         print(call) | ||||
|         return subprocess.check_output(call, stderr=subprocess.PIPE, shell=True) | ||||
| 
 | ||||
| 
 | ||||
| # with / without pwr DONE | ||||
| # with / without url encode DONE | ||||
| # with / without collapse user DONE | ||||
| # with output to sdtout DONE | ||||
| # with output to stdout DONE | ||||
| # note that the persistence radius is 7 by default | ||||
| # reading various file formats including | ||||
| #        7z, gz, bz2, xml  DONE | ||||
| # wikia and wikipedia data DONE | ||||
| # malformed xmls DONE | ||||
| 
 | ||||
| class Test_Wikipedia(unittest.TestCase): | ||||
|     def setUp(self): | ||||
|         if not os.path.exists("test_output"): | ||||
|             os.mkdir("test_output") | ||||
| class WikiqTestCase(unittest.TestCase): | ||||
|     def test_WP_noargs(self): | ||||
|         tester = WikiqTester(IKWIKI, "noargs") | ||||
| 
 | ||||
|         self.wiki = 'ikwiki-20180301-pages-meta-history' | ||||
|         self.wikiq_out_name =  self.wiki + ".tsv" | ||||
|         self.test_output_dir = os.path.join(".", "test_output") | ||||
|         self.call_output = os.path.join(self.test_output_dir, self.wikiq_out_name) | ||||
|         try: | ||||
|             tester.call_wikiq() | ||||
|         except subprocess.CalledProcessError as exc: | ||||
|             self.fail(exc.stderr.decode("utf8")) | ||||
| 
 | ||||
|         self.infile = "{0}.xml.bz2".format(self.wiki)     | ||||
|         self.base_call = "../wikiq {0} -o {1}" | ||||
|         self.input_dir = "dumps" | ||||
|         self.input_file = os.path.join(".", self.input_dir,self.infile) | ||||
|         self.baseline_output_dir = "baseline_output" | ||||
| 
 | ||||
|     def test_WP_url_encode(self): | ||||
|         test_filename =  "url-encode_" + self.wikiq_out_name | ||||
|         test_file = os.path.join(self.test_output_dir, test_filename) | ||||
|         if os.path.exists(test_file): | ||||
|             os.remove(test_file) | ||||
|          | ||||
|         call = self.base_call.format(self.input_file, self.test_output_dir) | ||||
|         call = call + " --url-encode" | ||||
|         proc = subprocess.Popen(call,stdout=subprocess.PIPE,shell=True) | ||||
|         proc.wait() | ||||
| 
 | ||||
|         copyfile(self.call_output, test_file) | ||||
|         baseline_file = os.path.join(".", self.baseline_output_dir, test_filename) | ||||
| 
 | ||||
|         # as a test let's make sure that we get equal data frames | ||||
|         test = pd.read_table(test_file) | ||||
|         baseline = pd.read_table(baseline_file) | ||||
|         assert_frame_equal(test,baseline) | ||||
|         test = pd.read_table(tester.output) | ||||
|         baseline = pd.read_table(tester.baseline_file) | ||||
|         assert_frame_equal(test, baseline, check_like=True) | ||||
| 
 | ||||
|     def test_WP_namespaces(self): | ||||
|         print(os.path.abspath('.')) | ||||
|         test_filename =  "namespaces_" + self.wikiq_out_name | ||||
|         test_file = os.path.join(self.test_output_dir, test_filename) | ||||
|         if os.path.exists(test_file): | ||||
|             os.remove(test_file) | ||||
|          | ||||
|         call = self.base_call.format(self.input_file, self.test_output_dir) | ||||
|         call = call + " -n 0 -n 1" | ||||
|         print(call) | ||||
|         proc = subprocess.Popen(call,stdout=subprocess.PIPE,shell=True) | ||||
|         proc.wait() | ||||
|         copyfile(self.call_output, test_file) | ||||
|         baseline_file = os.path.join(os.path.abspath("."), self.baseline_output_dir, test_filename) | ||||
|         tester = WikiqTester(IKWIKI, "namespaces") | ||||
| 
 | ||||
|         try: | ||||
|             tester.call_wikiq("-n 0", "-n 1") | ||||
|         except subprocess.CalledProcessError as exc: | ||||
|             self.fail(exc.stderr.decode("utf8")) | ||||
| 
 | ||||
|         # as a test let's make sure that we get equal data frames | ||||
|         test = pd.read_table(test_file) | ||||
|         num_wrong_ns = sum(~ test.namespace.isin({0,1})) | ||||
|         test = pd.read_table(tester.output) | ||||
|         num_wrong_ns = sum(~ test.namespace.isin({0, 1})) | ||||
|         self.assertEqual(num_wrong_ns, 0) | ||||
|         baseline = pd.read_table(baseline_file) | ||||
|         assert_frame_equal(test,baseline) | ||||
|         baseline = pd.read_table(tester.baseline_file) | ||||
|         assert_frame_equal(test, baseline, check_like=True) | ||||
| 
 | ||||
|     def test_WP_revert_radius(self): | ||||
|         print(os.path.abspath('.')) | ||||
|         test_filename =  "revert_radius_" + self.wikiq_out_name | ||||
|         test_file = os.path.join(self.test_output_dir, test_filename) | ||||
|         if os.path.exists(test_file): | ||||
|             os.remove(test_file) | ||||
|          | ||||
|         call = self.base_call.format(self.input_file, self.test_output_dir) | ||||
|         call = call + " -n 0 -n 1 -rr 1" | ||||
|         print(call) | ||||
|         proc = subprocess.Popen(call,stdout=subprocess.PIPE,shell=True) | ||||
|         proc.wait() | ||||
|         copyfile(self.call_output, test_file) | ||||
|         baseline_file = os.path.join(os.path.abspath("."), self.baseline_output_dir, test_filename) | ||||
|         tester = WikiqTester(IKWIKI, "revert_radius") | ||||
| 
 | ||||
|         try: | ||||
|             tester.call_wikiq("-n 0", "-n 1", "-rr 1") | ||||
|         except subprocess.CalledProcessError as exc: | ||||
|             self.fail(exc.stderr.decode("utf8")) | ||||
| 
 | ||||
|         # as a test let's make sure that we get equal data frames | ||||
|         test = pd.read_table(test_file) | ||||
|         num_wrong_ns = sum(~ test.namespace.isin({0,1})) | ||||
|         test = pd.read_table(tester.output) | ||||
|         num_wrong_ns = sum(~ test.namespace.isin({0, 1})) | ||||
|         self.assertEqual(num_wrong_ns, 0) | ||||
|         baseline = pd.read_table(baseline_file) | ||||
|         assert_frame_equal(test,baseline) | ||||
|         baseline = pd.read_table(tester.baseline_file) | ||||
|         assert_frame_equal(test, baseline, check_like=True) | ||||
| 
 | ||||
|     def test_WP_no_revert_radius(self): | ||||
|         tester = WikiqTester(IKWIKI, "no_revert_radius") | ||||
| 
 | ||||
|         try: | ||||
|             tester.call_wikiq("-rr 0") | ||||
|         except subprocess.CalledProcessError as exc: | ||||
|             self.fail(exc.stderr.decode("utf8")) | ||||
| 
 | ||||
| class Test_Basic(unittest.TestCase): | ||||
|         # as a test let's make sure that we get equal data frames | ||||
|         test = pd.read_table(tester.output) | ||||
|         num_reverted = sum(i is None for i in test.revert) | ||||
|         self.assertEqual(num_reverted, 0) | ||||
|         baseline = pd.read_table(tester.baseline_file) | ||||
|         assert_frame_equal(test, baseline, check_like=True) | ||||
| 
 | ||||
|     def setUp(self): | ||||
|         if not os.path.exists("test_output"): | ||||
|             os.mkdir("test_output") | ||||
|     def test_WP_collapse_user(self): | ||||
|         tester = WikiqTester(IKWIKI, "collapse_user") | ||||
| 
 | ||||
|         self.wiki = 'sailormoon' | ||||
|         self.wikiq_out_name =  self.wiki + ".tsv" | ||||
|         self.test_output_dir = os.path.join(".", "test_output") | ||||
|         self.call_output = os.path.join(self.test_output_dir, self.wikiq_out_name) | ||||
|         try: | ||||
|             tester.call_wikiq("--collapse-user") | ||||
|         except subprocess.CalledProcessError as exc: | ||||
|             self.fail(exc.stderr.decode("utf8")) | ||||
| 
 | ||||
|         self.infile = "{0}.xml.7z".format(self.wiki) | ||||
|         self.base_call = "../wikiq {0} -o {1}" | ||||
|         self.input_dir = "dumps" | ||||
|         self.input_file = os.path.join(".", self.input_dir,self.infile) | ||||
|         self.baseline_output_dir = "baseline_output" | ||||
|         test = pd.read_table(tester.output) | ||||
|         baseline = pd.read_table(tester.baseline_file) | ||||
|         assert_frame_equal(test, baseline, check_like=True) | ||||
| 
 | ||||
|     def test_noargs(self): | ||||
|         tester = WikiqTester(SAILORMOON, "noargs", in_compression="7z") | ||||
| 
 | ||||
|         test_filename =  "noargs_" + self.wikiq_out_name | ||||
|         test_file = os.path.join(self.test_output_dir, test_filename) | ||||
|         if os.path.exists(test_file): | ||||
|             os.remove(test_file) | ||||
|          | ||||
|         call = self.base_call.format(self.input_file, self.test_output_dir) | ||||
|         proc = subprocess.Popen(call,stdout=subprocess.PIPE,shell=True) | ||||
|         proc.wait() | ||||
| 
 | ||||
|         copyfile(self.call_output, test_file) | ||||
| 
 | ||||
|         baseline_file = os.path.join(".", self.baseline_output_dir, test_filename) | ||||
| 
 | ||||
|         test = pd.read_table(test_file) | ||||
|         baseline = pd.read_table(baseline_file) | ||||
|         assert_frame_equal(test,baseline) | ||||
|         try: | ||||
|             tester.call_wikiq() | ||||
|         except subprocess.CalledProcessError as exc: | ||||
|             self.fail(exc.stderr.decode("utf8")) | ||||
| 
 | ||||
|         test = pd.read_table(tester.output) | ||||
|         baseline = pd.read_table(tester.baseline_file) | ||||
|         assert_frame_equal(test, baseline, check_like=True) | ||||
| 
 | ||||
|     def test_collapse_user(self): | ||||
|         test_filename =  "collapse-user_" + self.wikiq_out_name | ||||
|         test_file = os.path.join(self.test_output_dir, test_filename) | ||||
|         if os.path.exists(test_file): | ||||
|             os.remove(test_file) | ||||
|          | ||||
|         call = self.base_call.format(self.input_file, self.test_output_dir) | ||||
|         call = call + " --collapse-user" | ||||
|         tester = WikiqTester(SAILORMOON, "collapse-user", in_compression="7z") | ||||
| 
 | ||||
|         proc = subprocess.Popen(call,stdout=subprocess.PIPE,shell=True) | ||||
|         proc.wait() | ||||
|         try: | ||||
|             tester.call_wikiq("--collapse-user", "--fandom-2020") | ||||
|         except subprocess.CalledProcessError as exc: | ||||
|             self.fail(exc.stderr.decode("utf8")) | ||||
| 
 | ||||
|         copyfile(self.call_output, test_file) | ||||
| 
 | ||||
|         baseline_file = os.path.join(".", self.baseline_output_dir, test_filename) | ||||
|         test = pd.read_table(test_file) | ||||
|         baseline = pd.read_table(baseline_file) | ||||
|         assert_frame_equal(test,baseline) | ||||
|         test = pd.read_table(tester.output) | ||||
|         baseline = pd.read_table(tester.baseline_file) | ||||
|         assert_frame_equal(test, baseline, check_like=True) | ||||
| 
 | ||||
|     def test_pwr_segment(self): | ||||
|         test_filename =  "persistence_segment_" + self.wikiq_out_name | ||||
|         test_file = os.path.join(self.test_output_dir, test_filename) | ||||
|         if os.path.exists(test_file): | ||||
|             os.remove(test_file) | ||||
|          | ||||
|         call = self.base_call.format(self.input_file, self.test_output_dir) | ||||
|         call = call + " --persistence segment" | ||||
|         proc = subprocess.Popen(call,stdout=subprocess.PIPE,shell=True) | ||||
|         proc.wait() | ||||
|         tester = WikiqTester(SAILORMOON, "persistence_segment", in_compression="7z") | ||||
| 
 | ||||
|         try: | ||||
|             tester.call_wikiq("--persistence segment", "--fandom-2020") | ||||
|         except subprocess.CalledProcessError as exc: | ||||
|             self.fail(exc.stderr.decode("utf8")) | ||||
| 
 | ||||
|         copyfile(self.call_output, test_file) | ||||
| 
 | ||||
|         baseline_file = os.path.join(".", self.baseline_output_dir, test_filename) | ||||
| 
 | ||||
|         test = pd.read_table(test_file) | ||||
|         baseline = pd.read_table(baseline_file) | ||||
|         assert_frame_equal(test,baseline) | ||||
|         test = pd.read_table(tester.output) | ||||
|         baseline = pd.read_table(tester.baseline_file) | ||||
|         assert_frame_equal(test, baseline, check_like=True) | ||||
| 
 | ||||
|     def test_pwr_legacy(self): | ||||
|         test_filename =  "persistence_legacy_" + self.wikiq_out_name | ||||
|         test_file = os.path.join(self.test_output_dir, test_filename) | ||||
|         if os.path.exists(test_file): | ||||
|             os.remove(test_file) | ||||
|          | ||||
|         call = self.base_call.format(self.input_file, self.test_output_dir) | ||||
|         call = call + " --persistence legacy" | ||||
|         proc = subprocess.Popen(call,stdout=subprocess.PIPE,shell=True) | ||||
|         proc.wait() | ||||
|         tester = WikiqTester(SAILORMOON, "persistence_legacy", in_compression="7z") | ||||
| 
 | ||||
|         try: | ||||
|             tester.call_wikiq("--persistence legacy", "--fandom-2020") | ||||
|         except subprocess.CalledProcessError as exc: | ||||
|             self.fail(exc.stderr.decode("utf8")) | ||||
| 
 | ||||
|         copyfile(self.call_output, test_file) | ||||
| 
 | ||||
|         baseline_file = os.path.join(".", self.baseline_output_dir, test_filename) | ||||
| 
 | ||||
|         test = pd.read_table(test_file) | ||||
|         baseline = pd.read_table(baseline_file) | ||||
|         assert_frame_equal(test,baseline) | ||||
|         test = pd.read_table(tester.output) | ||||
|         baseline = pd.read_table(tester.baseline_file) | ||||
|         assert_frame_equal(test, baseline, check_like=True) | ||||
| 
 | ||||
|     def test_pwr(self): | ||||
|         test_filename =  "persistence_" + self.wikiq_out_name | ||||
|         test_file = os.path.join(self.test_output_dir, test_filename) | ||||
|         if os.path.exists(test_file):  | ||||
|            os.remove(test_file) | ||||
|          | ||||
|         call = self.base_call.format(self.input_file, self.test_output_dir) | ||||
|         call = call + " --persistence" | ||||
|         proc = subprocess.Popen(call,stdout=subprocess.PIPE,shell=True) | ||||
|         proc.wait() | ||||
|         tester = WikiqTester(SAILORMOON, "persistence", in_compression="7z") | ||||
| 
 | ||||
|         try: | ||||
|             tester.call_wikiq("--persistence", "--fandom-2020") | ||||
|         except subprocess.CalledProcessError as exc: | ||||
|             self.fail(exc.stderr.decode("utf8")) | ||||
| 
 | ||||
|         copyfile(self.call_output, test_file) | ||||
| 
 | ||||
|         baseline_file = os.path.join(".", self.baseline_output_dir, test_filename) | ||||
| 
 | ||||
|         test = pd.read_table(test_file) | ||||
|         baseline = pd.read_table(baseline_file) | ||||
|         assert_frame_equal(test,baseline) | ||||
| 
 | ||||
| 
 | ||||
|     def test_url_encode(self): | ||||
|         test_filename =  "url-encode_" + self.wikiq_out_name | ||||
| 
 | ||||
|         test_file = os.path.join(self.test_output_dir, test_filename) | ||||
|         if os.path.exists(test_file): | ||||
|             os.remove(test_file) | ||||
|          | ||||
|         call = self.base_call.format(self.input_file, self.test_output_dir) | ||||
|         call = call + " --url-encode" | ||||
|         proc = subprocess.Popen(call,stdout=subprocess.PIPE,shell=True) | ||||
|         proc.wait() | ||||
| 
 | ||||
|         copyfile(self.call_output, test_file) | ||||
|         baseline_file = os.path.join(".", self.baseline_output_dir, test_filename) | ||||
|         test = pd.read_table(test_file) | ||||
|         baseline = pd.read_table(baseline_file) | ||||
|         assert_frame_equal(test,baseline) | ||||
| 
 | ||||
| 
 | ||||
| class Test_Malformed(unittest.TestCase): | ||||
|     def setUp(self): | ||||
|         if not os.path.exists("test_output"): | ||||
|             os.mkdir("test_output") | ||||
| 
 | ||||
|         self.wiki = 'twinpeaks' | ||||
|         self.wikiq_out_name =  self.wiki + ".tsv" | ||||
|         self.test_output_dir = os.path.join(".", "test_output") | ||||
|         self.call_output = os.path.join(self.test_output_dir, self.wikiq_out_name) | ||||
| 
 | ||||
|         self.infile = "{0}.xml.7z".format(self.wiki) | ||||
|         self.base_call = "../wikiq {0} -o {1}" | ||||
|         self.input_dir = "dumps" | ||||
|         self.input_file = os.path.join(".", self.input_dir,self.infile) | ||||
|         test = pd.read_table(tester.output) | ||||
|         baseline = pd.read_table(tester.baseline_file) | ||||
| 
 | ||||
|         test = test.reindex(columns=sorted(test.columns)) | ||||
|         assert_frame_equal(test, baseline, check_like=True) | ||||
| 
 | ||||
|     def test_malformed_noargs(self): | ||||
|         tester = WikiqTester(wiki=TWINPEAKS, case_name="noargs", in_compression="7z") | ||||
|         want_exception = 'xml.etree.ElementTree.ParseError: no element found: line 1369, column 0' | ||||
| 
 | ||||
|         call = self.base_call.format(self.input_file, self.test_output_dir) | ||||
|         proc = subprocess.Popen(call,stdout=subprocess.PIPE,stderr=subprocess.PIPE, shell=True) | ||||
|         proc.wait() | ||||
|         outs, errs = proc.communicate() | ||||
|         errlines = str(errs).split("\\n") | ||||
|         self.assertEqual(errlines[-2],'xml.etree.ElementTree.ParseError: no element found: line 1369, column 0') | ||||
|         try: | ||||
|             tester.call_wikiq() | ||||
|         except subprocess.CalledProcessError as exc: | ||||
|             errlines = exc.stderr.decode("utf8").splitlines() | ||||
|             self.assertEqual(errlines[-1], want_exception) | ||||
|         else: | ||||
|             self.fail("No exception raised, want: {}".format(want_exception)) | ||||
| 
 | ||||
| class Test_Stdout(unittest.TestCase): | ||||
|     def test_stdout_noargs(self): | ||||
|         tester = WikiqTester(wiki=SAILORMOON, case_name="noargs", in_compression="7z") | ||||
| 
 | ||||
|     def setUp(self): | ||||
|         self.wiki = 'sailormoon' | ||||
|         self.wikiq_out_name =  self.wiki + ".tsv" | ||||
|         try: | ||||
|             outs = tester.call_wikiq("--stdout", "--fandom-2020", out=False).decode("utf8") | ||||
|         except subprocess.CalledProcessError as exc: | ||||
|             self.fail(exc.stderr.decode("utf8")) | ||||
| 
 | ||||
|         self.infile = "{0}.xml.7z".format(self.wiki) | ||||
|         self.base_call = "../wikiq {0} --stdout" | ||||
|         self.input_dir = "dumps" | ||||
|         self.input_file = os.path.join(".", self.input_dir,self.infile) | ||||
|         self.baseline_output_dir = "baseline_output" | ||||
| 
 | ||||
|     def test_noargs(self): | ||||
| 
 | ||||
|         call = self.base_call.format(self.input_file) | ||||
|         proc = subprocess.run(call,stdout=subprocess.PIPE,shell=True) | ||||
|         outs = proc.stdout.decode("utf8") | ||||
| 
 | ||||
|         test_file = "noargs_" + self.wikiq_out_name | ||||
|         baseline_file = os.path.join(".", self.baseline_output_dir, test_file) | ||||
|         print(baseline_file) | ||||
|         test = pd.read_table(StringIO(outs)) | ||||
|         baseline = pd.read_table(baseline_file) | ||||
|         assert_frame_equal(test,baseline) | ||||
|         baseline = pd.read_table(tester.baseline_file) | ||||
|         assert_frame_equal(test, baseline, check_like=True) | ||||
| 
 | ||||
| class Test_Regex(unittest.TestCase): | ||||
|     def test_bad_regex(self): | ||||
|         tester = WikiqTester(wiki=REGEXTEST, case_name="bad_regex") | ||||
| 
 | ||||
|     def setUp(self): | ||||
|         self.wiki = 'regextest' | ||||
|         self.wikiq_out_name = self.wiki + '.tsv' | ||||
|         self.infile = "{0}.xml.bz2".format(self.wiki) | ||||
| 
 | ||||
|         self.input_dir = "dumps" | ||||
|         self.input_file = os.path.join(".", self.input_dir,self.infile) | ||||
| 
 | ||||
|         if not os.path.exists("test_output"): | ||||
|             os.mkdir("test_output") | ||||
| 
 | ||||
|         self.test_output_dir = os.path.join(".", "test_output") | ||||
|         self.call_output = os.path.join(self.test_output_dir, self.wikiq_out_name) | ||||
|         # we have two base calls, one for checking inputs and the other for checking outputs | ||||
|         self.base_call = "../wikiq {0}" | ||||
|         self.base_call_outs = "../wikiq {0} -o {1}" | ||||
| 
 | ||||
|         self.baseline_output_dir = "baseline_output" | ||||
| 
 | ||||
|         # sample inputs for checking that bad inputs get terminated / test_regex_inputs | ||||
|         self.bad_inputs_list = [ | ||||
|             #label is missing             | ||||
|             "-RP '\\b\\d+\\b'",  | ||||
|             #number of reg and number of labels do not match  | ||||
|         # sample arguments for checking that bad arguments get terminated / test_regex_arguments | ||||
|         bad_arguments_list = [ | ||||
|             # label is missing | ||||
|             "-RP '\\b\\d+\\b'", | ||||
|             # number of reg and number of labels do not match | ||||
|             "-RP 'NPO V' -RP THE -RPl testlabel", | ||||
|             #cp but rp label | ||||
|             # cp but rp label | ||||
|             "-CP '(Tamil|Li)' -RPl testlabel", | ||||
|             #regex is missing | ||||
|             # regex is missing | ||||
|             "-CPl testlabel", | ||||
|             "-RP '\\b\\w{3}\\b' -RPl threeletters -CP '\\b\\w{3}\\b'" | ||||
|         ] | ||||
| 
 | ||||
|         # sample inputs for checking the outcomes of good inputs / test_basic_regex | ||||
|         self.good_inputs_list = [ | ||||
|         for arguments in bad_arguments_list: | ||||
|             try: | ||||
|                 tester.call_wikiq("--stdout", arguments, out=False) | ||||
|             except subprocess.CalledProcessError as exc: | ||||
|                 # we want to check that the bad arguments were caught and sys.exit is stopping the code | ||||
|                 print(exc.stderr.decode("utf-8")) | ||||
|             else: | ||||
|                 self.fail("No exception raised, want Exception") | ||||
| 
 | ||||
|     def test_good_regex(self): | ||||
|         # sample arguments for checking the outcomes of good arguments / test_basic_regex | ||||
|         good_arguments_list = [ | ||||
|             "-RP '\\b\\d{3}\\b' -RPl threedigits", | ||||
|             "-RP 'TestCase' -RP 'page' -RPl testcases -RPl page_word", | ||||
|             "-CP 'Chevalier' -CPl chev_com -RP 'welcome to Wikipedia' -RPl wiki_welcome -CP 'Warning' -CPl warning", | ||||
|             "-CP 'WP:EVADE' -CPl wp_evade"          | ||||
|             "-CP 'WP:EVADE' -CPl wp_evade" | ||||
|         ] | ||||
| 
 | ||||
|          | ||||
|         self.cap_inputs_list = [ | ||||
|         for i, arguments in enumerate(good_arguments_list): | ||||
|             tester = WikiqTester(wiki=REGEXTEST, case_name="basic", suffix=str(i)) | ||||
| 
 | ||||
|             try: | ||||
|                 tester.call_wikiq(arguments) | ||||
|             except subprocess.CalledProcessError as exc: | ||||
|                 self.fail(exc.stderr.decode("utf8")) | ||||
| 
 | ||||
|             test = pd.read_table(tester.output) | ||||
| 
 | ||||
|             baseline = pd.read_table(tester.baseline_file) | ||||
|             assert_frame_equal(test, baseline, check_like=True) | ||||
|             print(i) | ||||
| 
 | ||||
|     def test_capturegroup_regex(self): | ||||
|         cap_arguments_list = [ | ||||
|             "-RP 'Li Chevalier' -RPl li_cheval -CP '(?P<letter>\\b[a-zA-Z]{3}\\b)|(?P<number>\\b\\d+\\b)|(?P<cat>\\bcat\\b)' -CPl three", | ||||
|             "-CP '(?P<a>\\bTestCaseA\\b)|(?P<b>\\bTestCaseB\\b)|(?P<c>\\bTestCaseC\\b)|(?P<d>\\bTestCaseD\\b)' -CPl testcase -RP '(?P<npov>npov|NPOV)|(?P<neutral>neutral point of view)' -RPl npov" | ||||
|         ] | ||||
| 
 | ||||
|         for i, arguments in enumerate(cap_arguments_list): | ||||
|             tester = WikiqTester(wiki=REGEXTEST, case_name="capturegroup", suffix=str(i)) | ||||
| 
 | ||||
|             try: | ||||
|                 tester.call_wikiq(arguments) | ||||
|             except subprocess.CalledProcessError as exc: | ||||
|                 self.fail(exc.stderr.decode("utf8")) | ||||
| 
 | ||||
|     def test_regex_inputs(self): | ||||
|         for input in self.bad_inputs_list: | ||||
|             call = self.base_call.format(self.input_file) | ||||
|             call = call + " --stdout " + input | ||||
|             print(call) | ||||
|             proc = subprocess.Popen(call,stdout=subprocess.PIPE,stderr=subprocess.PIPE,shell=True) | ||||
|             stdout,stderr = proc.communicate() | ||||
|             #print(proc.returncode) | ||||
|              | ||||
|             # we want to check that the bad inputs were caught and sys.exit is stopping the code | ||||
|             print(stderr.decode("utf-8")) | ||||
|             self.assertNotEqual(proc.returncode,0) | ||||
|             test = pd.read_table(tester.output) | ||||
| 
 | ||||
|     def test_basic_regex(self): | ||||
|         for i, input in enumerate(self.good_inputs_list): | ||||
|             baseline = pd.read_table(tester.baseline_file) | ||||
|             assert_frame_equal(test, baseline, check_like=True) | ||||
| 
 | ||||
|             test_filename = "basic_{0}_{1}.tsv".format(self.wikiq_out_name[:-4], str(i)) | ||||
|             #print(test_filename) | ||||
|             test_file = os.path.join(self.test_output_dir, test_filename) | ||||
|             if os.path.exists(test_file): | ||||
|                 os.remove(test_file) | ||||
|     def test_parquet(self): | ||||
|         tester = WikiqTester(IKWIKI, "noargs", out_format="parquet") | ||||
| 
 | ||||
|             call = self.base_call_outs.format(self.input_file, self.test_output_dir) | ||||
|             call = call + " " + input | ||||
|             print(call) | ||||
|         try: | ||||
|             tester.call_wikiq() | ||||
|         except subprocess.CalledProcessError as exc: | ||||
|             self.fail(exc.stderr.decode("utf8")) | ||||
| 
 | ||||
|             proc = subprocess.Popen(call,stdout=subprocess.PIPE,stderr=subprocess.PIPE,shell=True) | ||||
|             proc.wait() | ||||
|             copyfile(self.call_output, test_file) | ||||
|         # as a test let's make sure that we get equal data frames | ||||
|         test: DataFrame = pd.read_parquet(tester.output) | ||||
|         # test = test.drop(['reverteds'], axis=1) | ||||
| 
 | ||||
|             test = pd.read_table(test_file) | ||||
|              | ||||
|             baseline_file = os.path.join(".", self.baseline_output_dir, test_filename) | ||||
|             baseline = pd.read_table(baseline_file) | ||||
|             assert_frame_equal(test, baseline) | ||||
|             print(i) | ||||
|         baseline: DataFrame = pd.read_table(tester.baseline_file) | ||||
| 
 | ||||
|         # Pandas does not read timestamps as the desired datetime type. | ||||
|         baseline['date_time'] = pd.to_datetime(baseline['date_time']) | ||||
|         # Split strings to the arrays of reverted IDs so they can be compared. | ||||
|         baseline['revert'] = baseline['revert'].replace(np.nan, None) | ||||
|         baseline['reverteds'] = baseline['reverteds'].replace(np.nan, None) | ||||
|         # baseline['reverteds'] = [None if i is np.nan else [int(j) for j in str(i).split(",")] for i in baseline['reverteds']] | ||||
|         baseline['sha1'] = baseline['sha1'].replace(np.nan, None) | ||||
|         baseline['editor'] = baseline['editor'].replace(np.nan, None) | ||||
|         baseline['anon'] = baseline['anon'].replace(np.nan, None) | ||||
| 
 | ||||
|     def test_capturegroup_regex(self): | ||||
|         for i, input in enumerate(self.cap_inputs_list): | ||||
|             test_filename = "capturegroup_{0}_{1}.tsv".format(self.wikiq_out_name[:-4], str(i)) | ||||
|             print(test_filename) | ||||
|             test_file = os.path.join(self.test_output_dir, test_filename) | ||||
|             if os.path.exists(test_file): | ||||
|                 os.remove(test_file) | ||||
|         for index, row in baseline.iterrows(): | ||||
|             if row['revert'] != test['revert'][index]: | ||||
|                 print(row['revid'], ":", row['revert'], "!=", test['revert'][index]) | ||||
| 
 | ||||
|             call = self.base_call_outs.format(self.input_file, self.test_output_dir) | ||||
|             call = call + " " + input | ||||
|             print(call) | ||||
|         for col in baseline.columns: | ||||
|             try: | ||||
|                 assert_series_equal(test[col], baseline[col], check_like=True, check_dtype=False) | ||||
|             except ValueError as exc: | ||||
|                 print(f"Error comparing column {col}") | ||||
|                 self.fail(exc) | ||||
| 
 | ||||
|             proc = subprocess.Popen(call,stdout=subprocess.PIPE,stderr=subprocess.PIPE,shell=True) | ||||
|             proc.wait() | ||||
| 
 | ||||
|             copyfile(self.call_output, test_file) | ||||
|              | ||||
|             test = pd.read_table(test_file) | ||||
|              | ||||
|             baseline_file = os.path.join(".", self.baseline_output_dir, test_filename) | ||||
|             baseline = pd.read_table(baseline_file) | ||||
|             assert_frame_equal(test, baseline) | ||||
|         # assert_frame_equal(test, baseline, check_like=True, check_dtype=False) | ||||
| 
 | ||||
| 
 | ||||
| if __name__ == '__main__': | ||||
|  | ||||
							
								
								
									
										0
									
								
								test/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										0
									
								
								test/__init__.py
									
									
									
									
									
										Normal file
									
								
							| @ -1,27 +1,27 @@ | ||||
| anon	articleid	date_time	deleted	editor	editor_id	minor	namespace	revert	reverteds	revid	sha1	text_chars	threedigits	title | ||||
| FALSE	56237363	2018-01-07 10:40:58	FALSE	"NinjaRobotPirate"	3742946	FALSE	3	FALSE		819091731	135nz8q6lfam6cojla7azb7k5alx3t3	1141	126, 126, 126, 126	"User talk:86.139.142.254" | ||||
| FALSE	56237364	2018-01-07 10:41:10	FALSE	"Kavin kavitha"	32792125	FALSE	3	FALSE		819091755	0pwezjc6yopz0smc8al6ogc4fax5bwo	663	None	"User talk:Kavin kavitha" | ||||
| FALSE	56237365	2018-01-07 10:41:26	FALSE	"Amicable always"	32621254	FALSE	3	FALSE		819091788	sz3t2ap7z8bpkdvdvi195f3i35949bv	399	None	"User talk:Dr.vivek163" | ||||
| FALSE	56237366	2018-01-07 10:41:31	FALSE	"ClueBot NG"	13286072	FALSE	3	FALSE		819091796	r6s5j8j3iykenrhuhpnkpsmmd71vubf	1260	None	"User talk:Twistorl" | ||||
| FALSE	56237368	2018-01-07 10:41:51	FALSE	"Khruner"	8409334	FALSE	0	FALSE		819091825	tf5qz2yaswx61zrlm9ovxzuhl7r2dc4	2249	119, 978, 500, 292, 225, 199, 292	"Kom Firin" | ||||
| FALSE	56237368	2018-01-27 12:16:02	FALSE	"Khruner"	8409334	TRUE	0	FALSE		822610647	e6oa4g0qv64icdaq26uu1zzbyr5hcbh	2230	119, 978, 500, 292, 225, 199, 292	"Kom Firin" | ||||
| FALSE	56237369	2018-01-07 10:42:05	FALSE	"Editingaccount1994"	32794215	FALSE	2	FALSE		819091844	0fyvyh2a8xu41gt8obr34oba0bfixj6	27840	798, 150, 150, 150, 621, 137, 137, 150, 150, 350, 195, 350, 195, 180, 180, 350, 195, 300, 150, 150, 150, 180, 180, 621	"User:Editingaccount1994/sandbox" | ||||
| FALSE	56237369	2018-01-07 11:09:52	FALSE	"AnomieBOT"	7611264	TRUE	2	FALSE		819093984	8gy52aolt5rg3eaketwj5v7eiw0apv2	27787	798, 150, 150, 150, 621, 137, 137, 150, 150, 350, 195, 350, 195, 180, 180, 350, 195, 300, 150, 150, 150, 180, 180, 621	"User:Editingaccount1994/sandbox" | ||||
| FALSE	56237369	2018-01-12 21:45:50	FALSE	"SporkBot"	12406635	TRUE	2	FALSE		820064189	he8ydemaanxlrpftqxkez8jfpge1fsj	27784	798, 150, 150, 150, 621, 137, 137, 150, 150, 350, 195, 350, 195, 180, 180, 350, 195, 300, 150, 150, 150, 180, 180, 621	"User:Editingaccount1994/sandbox" | ||||
| FALSE	56237369	2018-01-12 23:28:11	FALSE	"SporkBot"	12406635	TRUE	2	FALSE		820078679	0to17w9rth3url8n7gvucdtobybdq5h	27783	798, 150, 150, 150, 621, 137, 137, 150, 150, 350, 195, 350, 195, 180, 180, 350, 195, 300, 150, 150, 150, 180, 180, 621	"User:Editingaccount1994/sandbox" | ||||
| FALSE	56237369	2018-01-12 23:28:39	FALSE	"SporkBot"	12406635	TRUE	2	FALSE		820078733	531dizmmloyxffbkdr5vph7owh921eg	27782	798, 150, 150, 150, 621, 137, 137, 150, 150, 350, 195, 350, 195, 180, 180, 350, 195, 300, 150, 150, 150, 180, 180, 621	"User:Editingaccount1994/sandbox" | ||||
| FALSE	56237369	2018-01-13 13:45:33	FALSE	"Frietjes"	13791031	FALSE	2	FALSE		820177382	nik9p2u2fuk4yazjxt8ymbicxv5qid9	27757	798, 150, 150, 150, 621, 100, 621	"User:Editingaccount1994/sandbox" | ||||
| FALSE	56237369	2018-01-24 01:35:22	FALSE	"CommonsDelinker"	2304267	FALSE	2	FALSE		822038928	gwk6pampl8si1v5pv3kwgteg710sfw3	27667	798, 150, 150, 150, 621, 100, 621	"User:Editingaccount1994/sandbox" | ||||
| FALSE	56237370	2018-01-07 10:42:20	FALSE	"PamD"	1368779	FALSE	0	FALSE		819091874	n4ozbsgle13p9yywtfrz982ccj8woc9	25	None	"Anita del Rey" | ||||
| FALSE	56237371	2018-01-07 10:42:27	FALSE	"ClueBot NG"	13286072	FALSE	3	FALSE		819091883	ksohnvsbeuzwpl5vb8a3v8m18hva0a7	1274	119, 157, 119, 157, 119, 157, 119, 157	"User talk:119.94.96.157" | ||||
| FALSE	56237372	2018-01-07 10:42:50	FALSE	"Underbar dk"	677153	FALSE	14	FALSE		819091914	je7aw21fedbwyqsyofpisdrynsu7olr	113	None	"Category:Ohmi Railway" | ||||
| FALSE	56237375	2018-01-07 10:43:32	FALSE	"TastyPoutine"	882433	FALSE	3	FALSE		819091968	cpm4tkzcx4hc6irr9ukbi06ogud8dtq	199	None	"User talk:92.226.219.222" | ||||
| FALSE	56237375	2018-01-07 11:10:24	FALSE	"AnomieBOT"	7611264	TRUE	3	FALSE		819094036	artmfz8b2gxhb3pp8a5p4ksplxqfkpg	1840	None	"User talk:92.226.219.222" | ||||
| FALSE	56237375	2018-01-07 14:33:36	FALSE	"Only"	702940	FALSE	3	FALSE		819112363	dn9wj0n8d8pdd5lqe56uw5xamupowr1	2949	126, 126, 126, 126	"User talk:92.226.219.222" | ||||
| FALSE	56237376	2018-01-07 10:44:01	FALSE	"Dipayanacharya"	32794237	FALSE	2	FALSE		819092004	ofueugwatmmn7u73isw732neuza57gk	28	None	"User:Dipayanacharya" | ||||
| FALSE	56237376	2018-01-07 10:49:08	FALSE	"Dipayanacharya"	32794237	FALSE	2	FALSE		819092390	dsz55xv96ec2uv6w9c1z7c52ipfovbw	38	None	"User:Dipayanacharya" | ||||
| FALSE	56237378	2018-01-07 10:44:56	FALSE	"Vinegarymass911"	21516552	FALSE	0	FALSE		819092066	9ma38hak0ef1ew4fpiutxpnzd8oz1wd	65	None	"BSCIC" | ||||
| FALSE	56237379	2018-01-07 10:45:21	FALSE	"BrownHairedGirl"	754619	FALSE	14	FALSE		819092102	4dvakoat58bzyf5hmtthxukt29hip6n	285	None	"Category:Women government ministers of Yemen" | ||||
| FALSE	56237381	2018-01-07 10:45:54	FALSE	"PRehse"	410898	FALSE	1	FALSE		819092135	2sjrxsc7os9k9pg4su2t4rk2j8nn0h7	103	None	"Talk:List of Morning Glories Characters" | ||||
| FALSE	56237382	2018-01-07 10:45:56	FALSE	"ClueBot NG"	13286072	FALSE	3	FALSE		819092138	3y9t5wpk6ur5jhone75rhm4wjf01fgi	1330	106, 207, 126, 114, 106, 207, 126, 114, 106, 207, 126, 114, 106, 207, 126, 114	"User talk:106.207.126.114" | ||||
| FALSE	56237382	2018-01-07 10:50:22	FALSE	"HindWIKI"	31190506	FALSE	3	FALSE		819092495	8wvn6vh3isyt0dorpe89lztrburgupe	2355	106, 207, 126, 114, 106, 207, 126, 114, 106, 207, 126, 114, 106, 207, 126, 114	"User talk:106.207.126.114" | ||||
| "revid"	"date_time"	"articleid"	"title"	"namespace"	"deleted"	"editorid"	"edit_summary"	"text_chars"	"reverteds"	"sha1"	"minor"	"editor"	"anon"	"revert"	"threedigits" | ||||
| 819091731	2018-01-07 10:40:58	56237363	"User talk:86.139.142.254"	3	false	3742946	"Your IP address has been blocked from editing because it has been used to [[WP:EVADE|evade a previous block]]. ([[WP:TW|TW]])"	1141		"135nz8q6lfam6cojla7azb7k5alx3t3"	false	"NinjaRobotPirate"	false	false	"126, 126, 126, 126" | ||||
| 819091755	2018-01-07 10:41:10	56237364	"User talk:Kavin kavitha"	3	false	32792125	"[[WP:AES|←]]Created page with ''''''Kavin (Tamil. கவின்) is a masculine given name, which is Tamil for ""beauty"", ""grace"", ""fairness"" or ""comeliness""Kavin is born on 01 /12/2001 at Sa...'"	663		"0pwezjc6yopz0smc8al6ogc4fax5bwo"	false	"Kavin kavitha"	false	false	 | ||||
| 819091788	2018-01-07 10:41:26	56237365	"User talk:Dr.vivek163"	3	false	32621254	"/* Regarding Merger discussion */ new section"	399		"sz3t2ap7z8bpkdvdvi195f3i35949bv"	false	"Amicable always"	false	false	 | ||||
| 819091796	2018-01-07 10:41:31	56237366	"User talk:Twistorl"	3	false	13286072	"Warning [[Special:Contributions/Twistorl|Twistorl]] - #1"	1260		"r6s5j8j3iykenrhuhpnkpsmmd71vubf"	false	"ClueBot NG"	false	false	 | ||||
| 819091825	2018-01-07 10:41:51	56237368	"Kom Firin"	0	false	8409334	"[[WP:AES|←]]Created page with '[[File:Stele 67.119 Brooklyn.jpg|thumb|Stele of the [[Libu#Great Chiefs of the Libu|Chief of the Libu]] Titaru, a contemporary of pharaoh [[Shoshenq V]] of the [...'TestCaseB and you're a Tor node "	2249		"tf5qz2yaswx61zrlm9ovxzuhl7r2dc4"	false	"Khruner"	false	false	"119, 978, 500, 292, 225, 199, 292" | ||||
| 822610647	2018-01-27 12:16:02	56237368	"Kom Firin"	0	false	8409334	"/* History */ typo"	2230		"e6oa4g0qv64icdaq26uu1zzbyr5hcbh"	true	"Khruner"	false	false	"119, 978, 500, 292, 225, 199, 292" | ||||
| 819091844	2018-01-07 10:42:05	56237369	"User:Editingaccount1994/sandbox"	2	false	32794215	"[[WP:AES|←]]Created page with '{{User sandbox}} <!-- EDIT BELOW THIS LINE --> {{voir homonymes|Chevalier}} {{Infobox Artiste  | nom                = Li Chevalier  | autres noms        =   | im...'"	27840		"0fyvyh2a8xu41gt8obr34oba0bfixj6"	false	"Editingaccount1994"	false	false	"798, 150, 150, 150, 621, 137, 137, 150, 150, 350, 195, 350, 195, 180, 180, 350, 195, 300, 150, 150, 150, 180, 180, 621" | ||||
| 819093984	2018-01-07 11:09:52	56237369	"User:Editingaccount1994/sandbox"	2	false	7611264	"[[User:AnomieBOT/docs/TemplateSubster|Substing templates]]: {{Lien web}}. See [[User:AnomieBOT/docs/TemplateSubster]] for info."	27787		"8gy52aolt5rg3eaketwj5v7eiw0apv2"	true	"AnomieBOT"	false	false	"798, 150, 150, 150, 621, 137, 137, 150, 150, 350, 195, 350, 195, 180, 180, 350, 195, 300, 150, 150, 150, 180, 180, 621" | ||||
| 820064189	2018-01-12 21:45:50	56237369	"User:Editingaccount1994/sandbox"	2	false	12406635	"Orphan per [[WP:TFD|TFD outcome]]"	27784		"he8ydemaanxlrpftqxkez8jfpge1fsj"	true	"SporkBot"	false	false	"798, 150, 150, 150, 621, 137, 137, 150, 150, 350, 195, 350, 195, 180, 180, 350, 195, 300, 150, 150, 150, 180, 180, 621" | ||||
| 820078679	2018-01-12 23:28:11	56237369	"User:Editingaccount1994/sandbox"	2	false	12406635	"Replace template per [[Wikipedia:Templates for discussion/Log/2010 June 13|TFD outcome]]; no change in content"	27783		"0to17w9rth3url8n7gvucdtobybdq5h"	true	"SporkBot"	false	false	"798, 150, 150, 150, 621, 137, 137, 150, 150, 350, 195, 350, 195, 180, 180, 350, 195, 300, 150, 150, 150, 180, 180, 621" | ||||
| 820078733	2018-01-12 23:28:39	56237369	"User:Editingaccount1994/sandbox"	2	false	12406635	"Replace template per [[Wikipedia:Templates for discussion/Log/2011 February 17|TFD outcome]]; no change in content"	27782		"531dizmmloyxffbkdr5vph7owh921eg"	true	"SporkBot"	false	false	"798, 150, 150, 150, 621, 137, 137, 150, 150, 350, 195, 350, 195, 180, 180, 350, 195, 300, 150, 150, 150, 180, 180, 621" | ||||
| 820177382	2018-01-13 13:45:33	56237369	"User:Editingaccount1994/sandbox"	2	false	13791031	"translate TestCaseD if you are from tor you need neutral point of view "	27757		"nik9p2u2fuk4yazjxt8ymbicxv5qid9"	false	"Frietjes"	false	false	"798, 150, 150, 150, 621, 100, 621" | ||||
| 822038928	2018-01-24 01:35:22	56237369	"User:Editingaccount1994/sandbox"	2	false	2304267	"Removing [[:c:File:Li_Chevalier_Art_Studio.jpg|Li_Chevalier_Art_Studio.jpg]], it has been deleted from Commons by [[:c:User:JuTa|JuTa]] because: [[:c:COM:OTRS|No permission]] since 16 January 2018."	27667		"gwk6pampl8si1v5pv3kwgteg710sfw3"	false	"CommonsDelinker"	false	false	"798, 150, 150, 150, 621, 100, 621" | ||||
| 819091874	2018-01-07 10:42:20	56237370	"Anita del Rey"	0	false	1368779	"r from alt name"	25		"n4ozbsgle13p9yywtfrz982ccj8woc9"	false	"PamD"	false	false	 | ||||
| 819091883	2018-01-07 10:42:27	56237371	"User talk:119.94.96.157"	3	false	13286072	"Warning [[Special:Contributions/119.94.96.157|119.94.96.157]] - #1"	1274		"ksohnvsbeuzwpl5vb8a3v8m18hva0a7"	false	"ClueBot NG"	false	false	"119, 157, 119, 157, 119, 157, 119, 157" | ||||
| 819091914	2018-01-07 10:42:50	56237372	"Category:Ohmi Railway"	14	false	677153	"[[WP:AES|←]]Created page with ' [[Category:Railway companies of Japan]] [[Category:Rail transport in Shiga Prefecture]] [[Category:Seibu Group]]'"	113		"je7aw21fedbwyqsyofpisdrynsu7olr"	false	"Underbar dk"	false	false	 | ||||
| 819091968	2018-01-07 10:43:32	56237375	"User talk:92.226.219.222"	3	false	882433	"[[WP:AES|←]]Created page with '{{3rr}}~~~~'"	199		"cpm4tkzcx4hc6irr9ukbi06ogud8dtq"	false	"TastyPoutine"	false	false	 | ||||
| 819094036	2018-01-07 11:10:24	56237375	"User talk:92.226.219.222"	3	false	7611264	"[[User:AnomieBOT/docs/TemplateSubster|Substing templates]]: {{3rr}}. See [[User:AnomieBOT/docs/TemplateSubster]] for info."	1840		"artmfz8b2gxhb3pp8a5p4ksplxqfkpg"	true	"AnomieBOT"	false	false	 | ||||
| 819112363	2018-01-07 14:33:36	56237375	"User talk:92.226.219.222"	3	false	702940	"Your IP address has been blocked from editing because it has been used to [[WP:EVADE|evade a previous block]]. ([[WP:TW|TW]])"	2949		"dn9wj0n8d8pdd5lqe56uw5xamupowr1"	false	"Only"	false	false	"126, 126, 126, 126" | ||||
| 819092004	2018-01-07 10:44:01	56237376	"User:Dipayanacharya"	2	false	32794237	"Education"	28		"ofueugwatmmn7u73isw732neuza57gk"	false	"Dipayanacharya"	false	false	 | ||||
| 819092390	2018-01-07 10:49:08	56237376	"User:Dipayanacharya"	2	false	32794237	"School"	38		"dsz55xv96ec2uv6w9c1z7c52ipfovbw"	false	"Dipayanacharya"	false	false	 | ||||
| 819092066	2018-01-07 10:44:56	56237378	"BSCIC"	0	false	21516552	"[[WP:AES|←]]Redirected page to [[Bangladesh Small and Cottage Industries Corporation]]"	65		"9ma38hak0ef1ew4fpiutxpnzd8oz1wd"	false	"Vinegarymass911"	false	false	 | ||||
| 819092102	2018-01-07 10:45:21	56237379	"Category:Women government ministers of Yemen"	14	false	754619	"[[WP:AES|←]]Created page with '{{portal|Yemen|Politics}} {{Non-diffusing subcategory|Government ministers of Yemen}}  {{Underpopulated category}}  Category:Women government ministers by nati...'"	285		"4dvakoat58bzyf5hmtthxukt29hip6n"	false	"BrownHairedGirl"	false	false	 | ||||
| 819092135	2018-01-07 10:45:54	56237381	"Talk:List of Morning Glories Characters"	1	false	410898	"[[WP:AES|←]]Created page with '{{WikiProject Fictional characters|class=List|importance=low}} {{Comicsproj|class=List|importance=low}}'"	103		"2sjrxsc7os9k9pg4su2t4rk2j8nn0h7"	false	"PRehse"	false	false	 | ||||
| 819092138	2018-01-07 10:45:56	56237382	"User talk:106.207.126.114"	3	false	13286072	"Warning [[Special:Contributions/106.207.126.114|106.207.126.114]] - #1"	1330		"3y9t5wpk6ur5jhone75rhm4wjf01fgi"	false	"ClueBot NG"	false	false	"106, 207, 126, 114, 106, 207, 126, 114, 106, 207, 126, 114, 106, 207, 126, 114" | ||||
| 819092495	2018-01-07 10:50:22	56237382	"User talk:106.207.126.114"	3	false	31190506	"Caution: Unconstructive editing on [[List of Baahubali characters]]. ([[WP:TW|TW]])"	2355		"8wvn6vh3isyt0dorpe89lztrburgupe"	false	"HindWIKI"	false	false	"106, 207, 126, 114, 106, 207, 126, 114, 106, 207, 126, 114, 106, 207, 126, 114" | ||||
|  | ||||
| 
 | 
| @ -1,27 +1,27 @@ | ||||
| anon	articleid	date_time	deleted	editor	editor_id	minor	namespace	page_word	revert	reverteds	revid	sha1	testcases	text_chars	title | ||||
| FALSE	56237363	2018-01-07 10:40:58	FALSE	"NinjaRobotPirate"	3742946	FALSE	3	page, page	FALSE		819091731	135nz8q6lfam6cojla7azb7k5alx3t3	None	1141	"User talk:86.139.142.254" | ||||
| FALSE	56237364	2018-01-07 10:41:10	FALSE	"Kavin kavitha"	32792125	FALSE	3	None	FALSE		819091755	0pwezjc6yopz0smc8al6ogc4fax5bwo	None	663	"User talk:Kavin kavitha" | ||||
| FALSE	56237365	2018-01-07 10:41:26	FALSE	"Amicable always"	32621254	FALSE	3	None	FALSE		819091788	sz3t2ap7z8bpkdvdvi195f3i35949bv	TestCase, TestCase	399	"User talk:Dr.vivek163" | ||||
| FALSE	56237366	2018-01-07 10:41:31	FALSE	"ClueBot NG"	13286072	FALSE	3	page	FALSE		819091796	r6s5j8j3iykenrhuhpnkpsmmd71vubf	None	1260	"User talk:Twistorl" | ||||
| FALSE	56237368	2018-01-07 10:41:51	FALSE	"Khruner"	8409334	FALSE	0	page	FALSE		819091825	tf5qz2yaswx61zrlm9ovxzuhl7r2dc4	TestCase	2249	"Kom Firin" | ||||
| FALSE	56237368	2018-01-27 12:16:02	FALSE	"Khruner"	8409334	TRUE	0	page	FALSE		822610647	e6oa4g0qv64icdaq26uu1zzbyr5hcbh	None	2230	"Kom Firin" | ||||
| FALSE	56237369	2018-01-07 10:42:05	FALSE	"Editingaccount1994"	32794215	FALSE	2	page, page	FALSE		819091844	0fyvyh2a8xu41gt8obr34oba0bfixj6	None	27840	"User:Editingaccount1994/sandbox" | ||||
| FALSE	56237369	2018-01-07 11:09:52	FALSE	"AnomieBOT"	7611264	TRUE	2	page, page	FALSE		819093984	8gy52aolt5rg3eaketwj5v7eiw0apv2	None	27787	"User:Editingaccount1994/sandbox" | ||||
| FALSE	56237369	2018-01-12 21:45:50	FALSE	"SporkBot"	12406635	TRUE	2	page, page	FALSE		820064189	he8ydemaanxlrpftqxkez8jfpge1fsj	None	27784	"User:Editingaccount1994/sandbox" | ||||
| FALSE	56237369	2018-01-12 23:28:11	FALSE	"SporkBot"	12406635	TRUE	2	page, page	FALSE		820078679	0to17w9rth3url8n7gvucdtobybdq5h	None	27783	"User:Editingaccount1994/sandbox" | ||||
| FALSE	56237369	2018-01-12 23:28:39	FALSE	"SporkBot"	12406635	TRUE	2	page, page	FALSE		820078733	531dizmmloyxffbkdr5vph7owh921eg	None	27782	"User:Editingaccount1994/sandbox" | ||||
| FALSE	56237369	2018-01-13 13:45:33	FALSE	"Frietjes"	13791031	FALSE	2	page, page	FALSE		820177382	nik9p2u2fuk4yazjxt8ymbicxv5qid9	None	27757	"User:Editingaccount1994/sandbox" | ||||
| FALSE	56237369	2018-01-24 01:35:22	FALSE	"CommonsDelinker"	2304267	FALSE	2	page, page	FALSE		822038928	gwk6pampl8si1v5pv3kwgteg710sfw3	None	27667	"User:Editingaccount1994/sandbox" | ||||
| FALSE	56237370	2018-01-07 10:42:20	FALSE	"PamD"	1368779	FALSE	0	None	FALSE		819091874	n4ozbsgle13p9yywtfrz982ccj8woc9	None	25	"Anita del Rey" | ||||
| FALSE	56237371	2018-01-07 10:42:27	FALSE	"ClueBot NG"	13286072	FALSE	3	page	FALSE		819091883	ksohnvsbeuzwpl5vb8a3v8m18hva0a7	None	1274	"User talk:119.94.96.157" | ||||
| FALSE	56237372	2018-01-07 10:42:50	FALSE	"Underbar dk"	677153	FALSE	14	None	FALSE		819091914	je7aw21fedbwyqsyofpisdrynsu7olr	None	113	"Category:Ohmi Railway" | ||||
| FALSE	56237375	2018-01-07 10:43:32	FALSE	"TastyPoutine"	882433	FALSE	3	None	FALSE		819091968	cpm4tkzcx4hc6irr9ukbi06ogud8dtq	None	199	"User talk:92.226.219.222" | ||||
| FALSE	56237375	2018-01-07 11:10:24	FALSE	"AnomieBOT"	7611264	TRUE	3	page, page, page, page	FALSE		819094036	artmfz8b2gxhb3pp8a5p4ksplxqfkpg	None	1840	"User talk:92.226.219.222" | ||||
| FALSE	56237375	2018-01-07 14:33:36	FALSE	"Only"	702940	FALSE	3	page, page, page, page, page, page	FALSE		819112363	dn9wj0n8d8pdd5lqe56uw5xamupowr1	None	2949	"User talk:92.226.219.222" | ||||
| FALSE	56237376	2018-01-07 10:44:01	FALSE	"Dipayanacharya"	32794237	FALSE	2	None	FALSE		819092004	ofueugwatmmn7u73isw732neuza57gk	None	28	"User:Dipayanacharya" | ||||
| FALSE	56237376	2018-01-07 10:49:08	FALSE	"Dipayanacharya"	32794237	FALSE	2	None	FALSE		819092390	dsz55xv96ec2uv6w9c1z7c52ipfovbw	None	38	"User:Dipayanacharya" | ||||
| FALSE	56237378	2018-01-07 10:44:56	FALSE	"Vinegarymass911"	21516552	FALSE	0	None	FALSE		819092066	9ma38hak0ef1ew4fpiutxpnzd8oz1wd	None	65	"BSCIC" | ||||
| FALSE	56237379	2018-01-07 10:45:21	FALSE	"BrownHairedGirl"	754619	FALSE	14	None	FALSE		819092102	4dvakoat58bzyf5hmtthxukt29hip6n	None	285	"Category:Women government ministers of Yemen" | ||||
| FALSE	56237381	2018-01-07 10:45:54	FALSE	"PRehse"	410898	FALSE	1	None	FALSE		819092135	2sjrxsc7os9k9pg4su2t4rk2j8nn0h7	None	103	"Talk:List of Morning Glories Characters" | ||||
| FALSE	56237382	2018-01-07 10:45:56	FALSE	"ClueBot NG"	13286072	FALSE	3	page	FALSE		819092138	3y9t5wpk6ur5jhone75rhm4wjf01fgi	None	1330	"User talk:106.207.126.114" | ||||
| FALSE	56237382	2018-01-07 10:50:22	FALSE	"HindWIKI"	31190506	FALSE	3	page	FALSE		819092495	8wvn6vh3isyt0dorpe89lztrburgupe	None	2355	"User talk:106.207.126.114" | ||||
| "revid"	"date_time"	"articleid"	"title"	"namespace"	"deleted"	"editorid"	"edit_summary"	"text_chars"	"reverteds"	"sha1"	"minor"	"editor"	"anon"	"revert"	"testcases"	"page_word" | ||||
| 819091731	2018-01-07 10:40:58	56237363	"User talk:86.139.142.254"	3	false	3742946	"Your IP address has been blocked from editing because it has been used to [[WP:EVADE|evade a previous block]]. ([[WP:TW|TW]])"	1141		"135nz8q6lfam6cojla7azb7k5alx3t3"	false	"NinjaRobotPirate"	false	false		"page, page" | ||||
| 819091755	2018-01-07 10:41:10	56237364	"User talk:Kavin kavitha"	3	false	32792125	"[[WP:AES|←]]Created page with ''''''Kavin (Tamil. கவின்) is a masculine given name, which is Tamil for ""beauty"", ""grace"", ""fairness"" or ""comeliness""Kavin is born on 01 /12/2001 at Sa...'"	663		"0pwezjc6yopz0smc8al6ogc4fax5bwo"	false	"Kavin kavitha"	false	false		 | ||||
| 819091788	2018-01-07 10:41:26	56237365	"User talk:Dr.vivek163"	3	false	32621254	"/* Regarding Merger discussion */ new section"	399		"sz3t2ap7z8bpkdvdvi195f3i35949bv"	false	"Amicable always"	false	false	"TestCase, TestCase"	 | ||||
| 819091796	2018-01-07 10:41:31	56237366	"User talk:Twistorl"	3	false	13286072	"Warning [[Special:Contributions/Twistorl|Twistorl]] - #1"	1260		"r6s5j8j3iykenrhuhpnkpsmmd71vubf"	false	"ClueBot NG"	false	false		"page" | ||||
| 819091825	2018-01-07 10:41:51	56237368	"Kom Firin"	0	false	8409334	"[[WP:AES|←]]Created page with '[[File:Stele 67.119 Brooklyn.jpg|thumb|Stele of the [[Libu#Great Chiefs of the Libu|Chief of the Libu]] Titaru, a contemporary of pharaoh [[Shoshenq V]] of the [...'TestCaseB and you're a Tor node "	2249		"tf5qz2yaswx61zrlm9ovxzuhl7r2dc4"	false	"Khruner"	false	false	"TestCase"	"page" | ||||
| 822610647	2018-01-27 12:16:02	56237368	"Kom Firin"	0	false	8409334	"/* History */ typo"	2230		"e6oa4g0qv64icdaq26uu1zzbyr5hcbh"	true	"Khruner"	false	false		"page" | ||||
| 819091844	2018-01-07 10:42:05	56237369	"User:Editingaccount1994/sandbox"	2	false	32794215	"[[WP:AES|←]]Created page with '{{User sandbox}} <!-- EDIT BELOW THIS LINE --> {{voir homonymes|Chevalier}} {{Infobox Artiste  | nom                = Li Chevalier  | autres noms        =   | im...'"	27840		"0fyvyh2a8xu41gt8obr34oba0bfixj6"	false	"Editingaccount1994"	false	false		"page, page" | ||||
| 819093984	2018-01-07 11:09:52	56237369	"User:Editingaccount1994/sandbox"	2	false	7611264	"[[User:AnomieBOT/docs/TemplateSubster|Substing templates]]: {{Lien web}}. See [[User:AnomieBOT/docs/TemplateSubster]] for info."	27787		"8gy52aolt5rg3eaketwj5v7eiw0apv2"	true	"AnomieBOT"	false	false		"page, page" | ||||
| 820064189	2018-01-12 21:45:50	56237369	"User:Editingaccount1994/sandbox"	2	false	12406635	"Orphan per [[WP:TFD|TFD outcome]]"	27784		"he8ydemaanxlrpftqxkez8jfpge1fsj"	true	"SporkBot"	false	false		"page, page" | ||||
| 820078679	2018-01-12 23:28:11	56237369	"User:Editingaccount1994/sandbox"	2	false	12406635	"Replace template per [[Wikipedia:Templates for discussion/Log/2010 June 13|TFD outcome]]; no change in content"	27783		"0to17w9rth3url8n7gvucdtobybdq5h"	true	"SporkBot"	false	false		"page, page" | ||||
| 820078733	2018-01-12 23:28:39	56237369	"User:Editingaccount1994/sandbox"	2	false	12406635	"Replace template per [[Wikipedia:Templates for discussion/Log/2011 February 17|TFD outcome]]; no change in content"	27782		"531dizmmloyxffbkdr5vph7owh921eg"	true	"SporkBot"	false	false		"page, page" | ||||
| 820177382	2018-01-13 13:45:33	56237369	"User:Editingaccount1994/sandbox"	2	false	13791031	"translate TestCaseD if you are from tor you need neutral point of view "	27757		"nik9p2u2fuk4yazjxt8ymbicxv5qid9"	false	"Frietjes"	false	false		"page, page" | ||||
| 822038928	2018-01-24 01:35:22	56237369	"User:Editingaccount1994/sandbox"	2	false	2304267	"Removing [[:c:File:Li_Chevalier_Art_Studio.jpg|Li_Chevalier_Art_Studio.jpg]], it has been deleted from Commons by [[:c:User:JuTa|JuTa]] because: [[:c:COM:OTRS|No permission]] since 16 January 2018."	27667		"gwk6pampl8si1v5pv3kwgteg710sfw3"	false	"CommonsDelinker"	false	false		"page, page" | ||||
| 819091874	2018-01-07 10:42:20	56237370	"Anita del Rey"	0	false	1368779	"r from alt name"	25		"n4ozbsgle13p9yywtfrz982ccj8woc9"	false	"PamD"	false	false		 | ||||
| 819091883	2018-01-07 10:42:27	56237371	"User talk:119.94.96.157"	3	false	13286072	"Warning [[Special:Contributions/119.94.96.157|119.94.96.157]] - #1"	1274		"ksohnvsbeuzwpl5vb8a3v8m18hva0a7"	false	"ClueBot NG"	false	false		"page" | ||||
| 819091914	2018-01-07 10:42:50	56237372	"Category:Ohmi Railway"	14	false	677153	"[[WP:AES|←]]Created page with ' [[Category:Railway companies of Japan]] [[Category:Rail transport in Shiga Prefecture]] [[Category:Seibu Group]]'"	113		"je7aw21fedbwyqsyofpisdrynsu7olr"	false	"Underbar dk"	false	false		 | ||||
| 819091968	2018-01-07 10:43:32	56237375	"User talk:92.226.219.222"	3	false	882433	"[[WP:AES|←]]Created page with '{{3rr}}~~~~'"	199		"cpm4tkzcx4hc6irr9ukbi06ogud8dtq"	false	"TastyPoutine"	false	false		 | ||||
| 819094036	2018-01-07 11:10:24	56237375	"User talk:92.226.219.222"	3	false	7611264	"[[User:AnomieBOT/docs/TemplateSubster|Substing templates]]: {{3rr}}. See [[User:AnomieBOT/docs/TemplateSubster]] for info."	1840		"artmfz8b2gxhb3pp8a5p4ksplxqfkpg"	true	"AnomieBOT"	false	false		"page, page, page, page" | ||||
| 819112363	2018-01-07 14:33:36	56237375	"User talk:92.226.219.222"	3	false	702940	"Your IP address has been blocked from editing because it has been used to [[WP:EVADE|evade a previous block]]. ([[WP:TW|TW]])"	2949		"dn9wj0n8d8pdd5lqe56uw5xamupowr1"	false	"Only"	false	false		"page, page, page, page, page, page" | ||||
| 819092004	2018-01-07 10:44:01	56237376	"User:Dipayanacharya"	2	false	32794237	"Education"	28		"ofueugwatmmn7u73isw732neuza57gk"	false	"Dipayanacharya"	false	false		 | ||||
| 819092390	2018-01-07 10:49:08	56237376	"User:Dipayanacharya"	2	false	32794237	"School"	38		"dsz55xv96ec2uv6w9c1z7c52ipfovbw"	false	"Dipayanacharya"	false	false		 | ||||
| 819092066	2018-01-07 10:44:56	56237378	"BSCIC"	0	false	21516552	"[[WP:AES|←]]Redirected page to [[Bangladesh Small and Cottage Industries Corporation]]"	65		"9ma38hak0ef1ew4fpiutxpnzd8oz1wd"	false	"Vinegarymass911"	false	false		 | ||||
| 819092102	2018-01-07 10:45:21	56237379	"Category:Women government ministers of Yemen"	14	false	754619	"[[WP:AES|←]]Created page with '{{portal|Yemen|Politics}} {{Non-diffusing subcategory|Government ministers of Yemen}}  {{Underpopulated category}}  Category:Women government ministers by nati...'"	285		"4dvakoat58bzyf5hmtthxukt29hip6n"	false	"BrownHairedGirl"	false	false		 | ||||
| 819092135	2018-01-07 10:45:54	56237381	"Talk:List of Morning Glories Characters"	1	false	410898	"[[WP:AES|←]]Created page with '{{WikiProject Fictional characters|class=List|importance=low}} {{Comicsproj|class=List|importance=low}}'"	103		"2sjrxsc7os9k9pg4su2t4rk2j8nn0h7"	false	"PRehse"	false	false		 | ||||
| 819092138	2018-01-07 10:45:56	56237382	"User talk:106.207.126.114"	3	false	13286072	"Warning [[Special:Contributions/106.207.126.114|106.207.126.114]] - #1"	1330		"3y9t5wpk6ur5jhone75rhm4wjf01fgi"	false	"ClueBot NG"	false	false		"page" | ||||
| 819092495	2018-01-07 10:50:22	56237382	"User talk:106.207.126.114"	3	false	31190506	"Caution: Unconstructive editing on [[List of Baahubali characters]]. ([[WP:TW|TW]])"	2355		"8wvn6vh3isyt0dorpe89lztrburgupe"	false	"HindWIKI"	false	false		"page" | ||||
|  | ||||
| 
 | 
| @ -1,27 +1,27 @@ | ||||
| anon	articleid	chev_com	date_time	deleted	editor	editor_id	minor	namespace	revert	reverteds	revid	sha1	text_chars	title	warning	wiki_welcome | ||||
| FALSE	56237363	None	2018-01-07 10:40:58	FALSE	"NinjaRobotPirate"	3742946	FALSE	3	FALSE		819091731	135nz8q6lfam6cojla7azb7k5alx3t3	1141	"User talk:86.139.142.254"	None	None | ||||
| FALSE	56237364	None	2018-01-07 10:41:10	FALSE	"Kavin kavitha"	32792125	FALSE	3	FALSE		819091755	0pwezjc6yopz0smc8al6ogc4fax5bwo	663	"User talk:Kavin kavitha"	None	None | ||||
| FALSE	56237365	None	2018-01-07 10:41:26	FALSE	"Amicable always"	32621254	FALSE	3	FALSE		819091788	sz3t2ap7z8bpkdvdvi195f3i35949bv	399	"User talk:Dr.vivek163"	None	None | ||||
| FALSE	56237366	None	2018-01-07 10:41:31	FALSE	"ClueBot NG"	13286072	FALSE	3	FALSE		819091796	r6s5j8j3iykenrhuhpnkpsmmd71vubf	1260	"User talk:Twistorl"	Warning	welcome to Wikipedia | ||||
| FALSE	56237368	None	2018-01-07 10:41:51	FALSE	"Khruner"	8409334	FALSE	0	FALSE		819091825	tf5qz2yaswx61zrlm9ovxzuhl7r2dc4	2249	"Kom Firin"	None	None | ||||
| FALSE	56237368	None	2018-01-27 12:16:02	FALSE	"Khruner"	8409334	TRUE	0	FALSE		822610647	e6oa4g0qv64icdaq26uu1zzbyr5hcbh	2230	"Kom Firin"	None	None | ||||
| FALSE	56237369	Chevalier, Chevalier	2018-01-07 10:42:05	FALSE	"Editingaccount1994"	32794215	FALSE	2	FALSE		819091844	0fyvyh2a8xu41gt8obr34oba0bfixj6	27840	"User:Editingaccount1994/sandbox"	None	None | ||||
| FALSE	56237369	None	2018-01-07 11:09:52	FALSE	"AnomieBOT"	7611264	TRUE	2	FALSE		819093984	8gy52aolt5rg3eaketwj5v7eiw0apv2	27787	"User:Editingaccount1994/sandbox"	None	None | ||||
| FALSE	56237369	None	2018-01-12 21:45:50	FALSE	"SporkBot"	12406635	TRUE	2	FALSE		820064189	he8ydemaanxlrpftqxkez8jfpge1fsj	27784	"User:Editingaccount1994/sandbox"	None	None | ||||
| FALSE	56237369	None	2018-01-12 23:28:11	FALSE	"SporkBot"	12406635	TRUE	2	FALSE		820078679	0to17w9rth3url8n7gvucdtobybdq5h	27783	"User:Editingaccount1994/sandbox"	None	None | ||||
| FALSE	56237369	None	2018-01-12 23:28:39	FALSE	"SporkBot"	12406635	TRUE	2	FALSE		820078733	531dizmmloyxffbkdr5vph7owh921eg	27782	"User:Editingaccount1994/sandbox"	None	None | ||||
| FALSE	56237369	None	2018-01-13 13:45:33	FALSE	"Frietjes"	13791031	FALSE	2	FALSE		820177382	nik9p2u2fuk4yazjxt8ymbicxv5qid9	27757	"User:Editingaccount1994/sandbox"	None	None | ||||
| FALSE	56237369	Chevalier, Chevalier	2018-01-24 01:35:22	FALSE	"CommonsDelinker"	2304267	FALSE	2	FALSE		822038928	gwk6pampl8si1v5pv3kwgteg710sfw3	27667	"User:Editingaccount1994/sandbox"	None	None | ||||
| FALSE	56237370	None	2018-01-07 10:42:20	FALSE	"PamD"	1368779	FALSE	0	FALSE		819091874	n4ozbsgle13p9yywtfrz982ccj8woc9	25	"Anita del Rey"	None	None | ||||
| FALSE	56237371	None	2018-01-07 10:42:27	FALSE	"ClueBot NG"	13286072	FALSE	3	FALSE		819091883	ksohnvsbeuzwpl5vb8a3v8m18hva0a7	1274	"User talk:119.94.96.157"	Warning	welcome to Wikipedia | ||||
| FALSE	56237372	None	2018-01-07 10:42:50	FALSE	"Underbar dk"	677153	FALSE	14	FALSE		819091914	je7aw21fedbwyqsyofpisdrynsu7olr	113	"Category:Ohmi Railway"	None	None | ||||
| FALSE	56237375	None	2018-01-07 10:43:32	FALSE	"TastyPoutine"	882433	FALSE	3	FALSE		819091968	cpm4tkzcx4hc6irr9ukbi06ogud8dtq	199	"User talk:92.226.219.222"	None	None | ||||
| FALSE	56237375	None	2018-01-07 11:10:24	FALSE	"AnomieBOT"	7611264	TRUE	3	FALSE		819094036	artmfz8b2gxhb3pp8a5p4ksplxqfkpg	1840	"User talk:92.226.219.222"	None	None | ||||
| FALSE	56237375	None	2018-01-07 14:33:36	FALSE	"Only"	702940	FALSE	3	FALSE		819112363	dn9wj0n8d8pdd5lqe56uw5xamupowr1	2949	"User talk:92.226.219.222"	None	None | ||||
| FALSE	56237376	None	2018-01-07 10:44:01	FALSE	"Dipayanacharya"	32794237	FALSE	2	FALSE		819092004	ofueugwatmmn7u73isw732neuza57gk	28	"User:Dipayanacharya"	None	None | ||||
| FALSE	56237376	None	2018-01-07 10:49:08	FALSE	"Dipayanacharya"	32794237	FALSE	2	FALSE		819092390	dsz55xv96ec2uv6w9c1z7c52ipfovbw	38	"User:Dipayanacharya"	None	None | ||||
| FALSE	56237378	None	2018-01-07 10:44:56	FALSE	"Vinegarymass911"	21516552	FALSE	0	FALSE		819092066	9ma38hak0ef1ew4fpiutxpnzd8oz1wd	65	"BSCIC"	None	None | ||||
| FALSE	56237379	None	2018-01-07 10:45:21	FALSE	"BrownHairedGirl"	754619	FALSE	14	FALSE		819092102	4dvakoat58bzyf5hmtthxukt29hip6n	285	"Category:Women government ministers of Yemen"	None	None | ||||
| FALSE	56237381	None	2018-01-07 10:45:54	FALSE	"PRehse"	410898	FALSE	1	FALSE		819092135	2sjrxsc7os9k9pg4su2t4rk2j8nn0h7	103	"Talk:List of Morning Glories Characters"	None	None | ||||
| FALSE	56237382	None	2018-01-07 10:45:56	FALSE	"ClueBot NG"	13286072	FALSE	3	FALSE		819092138	3y9t5wpk6ur5jhone75rhm4wjf01fgi	1330	"User talk:106.207.126.114"	Warning	welcome to Wikipedia | ||||
| FALSE	56237382	None	2018-01-07 10:50:22	FALSE	"HindWIKI"	31190506	FALSE	3	FALSE		819092495	8wvn6vh3isyt0dorpe89lztrburgupe	2355	"User talk:106.207.126.114"	None	welcome to Wikipedia | ||||
| "revid"	"date_time"	"articleid"	"title"	"namespace"	"deleted"	"editorid"	"edit_summary"	"text_chars"	"reverteds"	"sha1"	"minor"	"editor"	"anon"	"revert"	"wiki_welcome"	"chev_com"	"warning" | ||||
| 819091731	2018-01-07 10:40:58	56237363	"User talk:86.139.142.254"	3	false	3742946	"Your IP address has been blocked from editing because it has been used to [[WP:EVADE|evade a previous block]]. ([[WP:TW|TW]])"	1141		"135nz8q6lfam6cojla7azb7k5alx3t3"	false	"NinjaRobotPirate"	false	false			 | ||||
| 819091755	2018-01-07 10:41:10	56237364	"User talk:Kavin kavitha"	3	false	32792125	"[[WP:AES|←]]Created page with ''''''Kavin (Tamil. கவின்) is a masculine given name, which is Tamil for ""beauty"", ""grace"", ""fairness"" or ""comeliness""Kavin is born on 01 /12/2001 at Sa...'"	663		"0pwezjc6yopz0smc8al6ogc4fax5bwo"	false	"Kavin kavitha"	false	false			 | ||||
| 819091788	2018-01-07 10:41:26	56237365	"User talk:Dr.vivek163"	3	false	32621254	"/* Regarding Merger discussion */ new section"	399		"sz3t2ap7z8bpkdvdvi195f3i35949bv"	false	"Amicable always"	false	false			 | ||||
| 819091796	2018-01-07 10:41:31	56237366	"User talk:Twistorl"	3	false	13286072	"Warning [[Special:Contributions/Twistorl|Twistorl]] - #1"	1260		"r6s5j8j3iykenrhuhpnkpsmmd71vubf"	false	"ClueBot NG"	false	false	"welcome to Wikipedia"		"Warning" | ||||
| 819091825	2018-01-07 10:41:51	56237368	"Kom Firin"	0	false	8409334	"[[WP:AES|←]]Created page with '[[File:Stele 67.119 Brooklyn.jpg|thumb|Stele of the [[Libu#Great Chiefs of the Libu|Chief of the Libu]] Titaru, a contemporary of pharaoh [[Shoshenq V]] of the [...'TestCaseB and you're a Tor node "	2249		"tf5qz2yaswx61zrlm9ovxzuhl7r2dc4"	false	"Khruner"	false	false			 | ||||
| 822610647	2018-01-27 12:16:02	56237368	"Kom Firin"	0	false	8409334	"/* History */ typo"	2230		"e6oa4g0qv64icdaq26uu1zzbyr5hcbh"	true	"Khruner"	false	false			 | ||||
| 819091844	2018-01-07 10:42:05	56237369	"User:Editingaccount1994/sandbox"	2	false	32794215	"[[WP:AES|←]]Created page with '{{User sandbox}} <!-- EDIT BELOW THIS LINE --> {{voir homonymes|Chevalier}} {{Infobox Artiste  | nom                = Li Chevalier  | autres noms        =   | im...'"	27840		"0fyvyh2a8xu41gt8obr34oba0bfixj6"	false	"Editingaccount1994"	false	false		"Chevalier, Chevalier"	 | ||||
| 819093984	2018-01-07 11:09:52	56237369	"User:Editingaccount1994/sandbox"	2	false	7611264	"[[User:AnomieBOT/docs/TemplateSubster|Substing templates]]: {{Lien web}}. See [[User:AnomieBOT/docs/TemplateSubster]] for info."	27787		"8gy52aolt5rg3eaketwj5v7eiw0apv2"	true	"AnomieBOT"	false	false			 | ||||
| 820064189	2018-01-12 21:45:50	56237369	"User:Editingaccount1994/sandbox"	2	false	12406635	"Orphan per [[WP:TFD|TFD outcome]]"	27784		"he8ydemaanxlrpftqxkez8jfpge1fsj"	true	"SporkBot"	false	false			 | ||||
| 820078679	2018-01-12 23:28:11	56237369	"User:Editingaccount1994/sandbox"	2	false	12406635	"Replace template per [[Wikipedia:Templates for discussion/Log/2010 June 13|TFD outcome]]; no change in content"	27783		"0to17w9rth3url8n7gvucdtobybdq5h"	true	"SporkBot"	false	false			 | ||||
| 820078733	2018-01-12 23:28:39	56237369	"User:Editingaccount1994/sandbox"	2	false	12406635	"Replace template per [[Wikipedia:Templates for discussion/Log/2011 February 17|TFD outcome]]; no change in content"	27782		"531dizmmloyxffbkdr5vph7owh921eg"	true	"SporkBot"	false	false			 | ||||
| 820177382	2018-01-13 13:45:33	56237369	"User:Editingaccount1994/sandbox"	2	false	13791031	"translate TestCaseD if you are from tor you need neutral point of view "	27757		"nik9p2u2fuk4yazjxt8ymbicxv5qid9"	false	"Frietjes"	false	false			 | ||||
| 822038928	2018-01-24 01:35:22	56237369	"User:Editingaccount1994/sandbox"	2	false	2304267	"Removing [[:c:File:Li_Chevalier_Art_Studio.jpg|Li_Chevalier_Art_Studio.jpg]], it has been deleted from Commons by [[:c:User:JuTa|JuTa]] because: [[:c:COM:OTRS|No permission]] since 16 January 2018."	27667		"gwk6pampl8si1v5pv3kwgteg710sfw3"	false	"CommonsDelinker"	false	false		"Chevalier, Chevalier"	 | ||||
| 819091874	2018-01-07 10:42:20	56237370	"Anita del Rey"	0	false	1368779	"r from alt name"	25		"n4ozbsgle13p9yywtfrz982ccj8woc9"	false	"PamD"	false	false			 | ||||
| 819091883	2018-01-07 10:42:27	56237371	"User talk:119.94.96.157"	3	false	13286072	"Warning [[Special:Contributions/119.94.96.157|119.94.96.157]] - #1"	1274		"ksohnvsbeuzwpl5vb8a3v8m18hva0a7"	false	"ClueBot NG"	false	false	"welcome to Wikipedia"		"Warning" | ||||
| 819091914	2018-01-07 10:42:50	56237372	"Category:Ohmi Railway"	14	false	677153	"[[WP:AES|←]]Created page with ' [[Category:Railway companies of Japan]] [[Category:Rail transport in Shiga Prefecture]] [[Category:Seibu Group]]'"	113		"je7aw21fedbwyqsyofpisdrynsu7olr"	false	"Underbar dk"	false	false			 | ||||
| 819091968	2018-01-07 10:43:32	56237375	"User talk:92.226.219.222"	3	false	882433	"[[WP:AES|←]]Created page with '{{3rr}}~~~~'"	199		"cpm4tkzcx4hc6irr9ukbi06ogud8dtq"	false	"TastyPoutine"	false	false			 | ||||
| 819094036	2018-01-07 11:10:24	56237375	"User talk:92.226.219.222"	3	false	7611264	"[[User:AnomieBOT/docs/TemplateSubster|Substing templates]]: {{3rr}}. See [[User:AnomieBOT/docs/TemplateSubster]] for info."	1840		"artmfz8b2gxhb3pp8a5p4ksplxqfkpg"	true	"AnomieBOT"	false	false			 | ||||
| 819112363	2018-01-07 14:33:36	56237375	"User talk:92.226.219.222"	3	false	702940	"Your IP address has been blocked from editing because it has been used to [[WP:EVADE|evade a previous block]]. ([[WP:TW|TW]])"	2949		"dn9wj0n8d8pdd5lqe56uw5xamupowr1"	false	"Only"	false	false			 | ||||
| 819092004	2018-01-07 10:44:01	56237376	"User:Dipayanacharya"	2	false	32794237	"Education"	28		"ofueugwatmmn7u73isw732neuza57gk"	false	"Dipayanacharya"	false	false			 | ||||
| 819092390	2018-01-07 10:49:08	56237376	"User:Dipayanacharya"	2	false	32794237	"School"	38		"dsz55xv96ec2uv6w9c1z7c52ipfovbw"	false	"Dipayanacharya"	false	false			 | ||||
| 819092066	2018-01-07 10:44:56	56237378	"BSCIC"	0	false	21516552	"[[WP:AES|←]]Redirected page to [[Bangladesh Small and Cottage Industries Corporation]]"	65		"9ma38hak0ef1ew4fpiutxpnzd8oz1wd"	false	"Vinegarymass911"	false	false			 | ||||
| 819092102	2018-01-07 10:45:21	56237379	"Category:Women government ministers of Yemen"	14	false	754619	"[[WP:AES|←]]Created page with '{{portal|Yemen|Politics}} {{Non-diffusing subcategory|Government ministers of Yemen}}  {{Underpopulated category}}  Category:Women government ministers by nati...'"	285		"4dvakoat58bzyf5hmtthxukt29hip6n"	false	"BrownHairedGirl"	false	false			 | ||||
| 819092135	2018-01-07 10:45:54	56237381	"Talk:List of Morning Glories Characters"	1	false	410898	"[[WP:AES|←]]Created page with '{{WikiProject Fictional characters|class=List|importance=low}} {{Comicsproj|class=List|importance=low}}'"	103		"2sjrxsc7os9k9pg4su2t4rk2j8nn0h7"	false	"PRehse"	false	false			 | ||||
| 819092138	2018-01-07 10:45:56	56237382	"User talk:106.207.126.114"	3	false	13286072	"Warning [[Special:Contributions/106.207.126.114|106.207.126.114]] - #1"	1330		"3y9t5wpk6ur5jhone75rhm4wjf01fgi"	false	"ClueBot NG"	false	false	"welcome to Wikipedia"		"Warning" | ||||
| 819092495	2018-01-07 10:50:22	56237382	"User talk:106.207.126.114"	3	false	31190506	"Caution: Unconstructive editing on [[List of Baahubali characters]]. ([[WP:TW|TW]])"	2355		"8wvn6vh3isyt0dorpe89lztrburgupe"	false	"HindWIKI"	false	false	"welcome to Wikipedia"		 | ||||
|  | ||||
| 
 | 
| @ -1,27 +1,27 @@ | ||||
| anon	articleid	date_time	deleted	editor	editor_id	minor	namespace	revert	reverteds	revid	sha1	text_chars	title	wp_evade | ||||
| FALSE	56237363	2018-01-07 10:40:58	FALSE	"NinjaRobotPirate"	3742946	FALSE	3	FALSE		819091731	135nz8q6lfam6cojla7azb7k5alx3t3	1141	"User talk:86.139.142.254"	WP:EVADE | ||||
| FALSE	56237364	2018-01-07 10:41:10	FALSE	"Kavin kavitha"	32792125	FALSE	3	FALSE		819091755	0pwezjc6yopz0smc8al6ogc4fax5bwo	663	"User talk:Kavin kavitha"	None | ||||
| FALSE	56237365	2018-01-07 10:41:26	FALSE	"Amicable always"	32621254	FALSE	3	FALSE		819091788	sz3t2ap7z8bpkdvdvi195f3i35949bv	399	"User talk:Dr.vivek163"	None | ||||
| FALSE	56237366	2018-01-07 10:41:31	FALSE	"ClueBot NG"	13286072	FALSE	3	FALSE		819091796	r6s5j8j3iykenrhuhpnkpsmmd71vubf	1260	"User talk:Twistorl"	None | ||||
| FALSE	56237368	2018-01-07 10:41:51	FALSE	"Khruner"	8409334	FALSE	0	FALSE		819091825	tf5qz2yaswx61zrlm9ovxzuhl7r2dc4	2249	"Kom Firin"	None | ||||
| FALSE	56237368	2018-01-27 12:16:02	FALSE	"Khruner"	8409334	TRUE	0	FALSE		822610647	e6oa4g0qv64icdaq26uu1zzbyr5hcbh	2230	"Kom Firin"	None | ||||
| FALSE	56237369	2018-01-07 10:42:05	FALSE	"Editingaccount1994"	32794215	FALSE	2	FALSE		819091844	0fyvyh2a8xu41gt8obr34oba0bfixj6	27840	"User:Editingaccount1994/sandbox"	None | ||||
| FALSE	56237369	2018-01-07 11:09:52	FALSE	"AnomieBOT"	7611264	TRUE	2	FALSE		819093984	8gy52aolt5rg3eaketwj5v7eiw0apv2	27787	"User:Editingaccount1994/sandbox"	None | ||||
| FALSE	56237369	2018-01-12 21:45:50	FALSE	"SporkBot"	12406635	TRUE	2	FALSE		820064189	he8ydemaanxlrpftqxkez8jfpge1fsj	27784	"User:Editingaccount1994/sandbox"	None | ||||
| FALSE	56237369	2018-01-12 23:28:11	FALSE	"SporkBot"	12406635	TRUE	2	FALSE		820078679	0to17w9rth3url8n7gvucdtobybdq5h	27783	"User:Editingaccount1994/sandbox"	None | ||||
| FALSE	56237369	2018-01-12 23:28:39	FALSE	"SporkBot"	12406635	TRUE	2	FALSE		820078733	531dizmmloyxffbkdr5vph7owh921eg	27782	"User:Editingaccount1994/sandbox"	None | ||||
| FALSE	56237369	2018-01-13 13:45:33	FALSE	"Frietjes"	13791031	FALSE	2	FALSE		820177382	nik9p2u2fuk4yazjxt8ymbicxv5qid9	27757	"User:Editingaccount1994/sandbox"	None | ||||
| FALSE	56237369	2018-01-24 01:35:22	FALSE	"CommonsDelinker"	2304267	FALSE	2	FALSE		822038928	gwk6pampl8si1v5pv3kwgteg710sfw3	27667	"User:Editingaccount1994/sandbox"	None | ||||
| FALSE	56237370	2018-01-07 10:42:20	FALSE	"PamD"	1368779	FALSE	0	FALSE		819091874	n4ozbsgle13p9yywtfrz982ccj8woc9	25	"Anita del Rey"	None | ||||
| FALSE	56237371	2018-01-07 10:42:27	FALSE	"ClueBot NG"	13286072	FALSE	3	FALSE		819091883	ksohnvsbeuzwpl5vb8a3v8m18hva0a7	1274	"User talk:119.94.96.157"	None | ||||
| FALSE	56237372	2018-01-07 10:42:50	FALSE	"Underbar dk"	677153	FALSE	14	FALSE		819091914	je7aw21fedbwyqsyofpisdrynsu7olr	113	"Category:Ohmi Railway"	None | ||||
| FALSE	56237375	2018-01-07 10:43:32	FALSE	"TastyPoutine"	882433	FALSE	3	FALSE		819091968	cpm4tkzcx4hc6irr9ukbi06ogud8dtq	199	"User talk:92.226.219.222"	None | ||||
| FALSE	56237375	2018-01-07 11:10:24	FALSE	"AnomieBOT"	7611264	TRUE	3	FALSE		819094036	artmfz8b2gxhb3pp8a5p4ksplxqfkpg	1840	"User talk:92.226.219.222"	None | ||||
| FALSE	56237375	2018-01-07 14:33:36	FALSE	"Only"	702940	FALSE	3	FALSE		819112363	dn9wj0n8d8pdd5lqe56uw5xamupowr1	2949	"User talk:92.226.219.222"	WP:EVADE | ||||
| FALSE	56237376	2018-01-07 10:44:01	FALSE	"Dipayanacharya"	32794237	FALSE	2	FALSE		819092004	ofueugwatmmn7u73isw732neuza57gk	28	"User:Dipayanacharya"	None | ||||
| FALSE	56237376	2018-01-07 10:49:08	FALSE	"Dipayanacharya"	32794237	FALSE	2	FALSE		819092390	dsz55xv96ec2uv6w9c1z7c52ipfovbw	38	"User:Dipayanacharya"	None | ||||
| FALSE	56237378	2018-01-07 10:44:56	FALSE	"Vinegarymass911"	21516552	FALSE	0	FALSE		819092066	9ma38hak0ef1ew4fpiutxpnzd8oz1wd	65	"BSCIC"	None | ||||
| FALSE	56237379	2018-01-07 10:45:21	FALSE	"BrownHairedGirl"	754619	FALSE	14	FALSE		819092102	4dvakoat58bzyf5hmtthxukt29hip6n	285	"Category:Women government ministers of Yemen"	None | ||||
| FALSE	56237381	2018-01-07 10:45:54	FALSE	"PRehse"	410898	FALSE	1	FALSE		819092135	2sjrxsc7os9k9pg4su2t4rk2j8nn0h7	103	"Talk:List of Morning Glories Characters"	None | ||||
| FALSE	56237382	2018-01-07 10:45:56	FALSE	"ClueBot NG"	13286072	FALSE	3	FALSE		819092138	3y9t5wpk6ur5jhone75rhm4wjf01fgi	1330	"User talk:106.207.126.114"	None | ||||
| FALSE	56237382	2018-01-07 10:50:22	FALSE	"HindWIKI"	31190506	FALSE	3	FALSE		819092495	8wvn6vh3isyt0dorpe89lztrburgupe	2355	"User talk:106.207.126.114"	None | ||||
| "revid"	"date_time"	"articleid"	"title"	"namespace"	"deleted"	"editorid"	"edit_summary"	"text_chars"	"reverteds"	"sha1"	"minor"	"editor"	"anon"	"revert"	"wp_evade" | ||||
| 819091731	2018-01-07 10:40:58	56237363	"User talk:86.139.142.254"	3	false	3742946	"Your IP address has been blocked from editing because it has been used to [[WP:EVADE|evade a previous block]]. ([[WP:TW|TW]])"	1141		"135nz8q6lfam6cojla7azb7k5alx3t3"	false	"NinjaRobotPirate"	false	false	"WP:EVADE" | ||||
| 819091755	2018-01-07 10:41:10	56237364	"User talk:Kavin kavitha"	3	false	32792125	"[[WP:AES|←]]Created page with ''''''Kavin (Tamil. கவின்) is a masculine given name, which is Tamil for ""beauty"", ""grace"", ""fairness"" or ""comeliness""Kavin is born on 01 /12/2001 at Sa...'"	663		"0pwezjc6yopz0smc8al6ogc4fax5bwo"	false	"Kavin kavitha"	false	false	 | ||||
| 819091788	2018-01-07 10:41:26	56237365	"User talk:Dr.vivek163"	3	false	32621254	"/* Regarding Merger discussion */ new section"	399		"sz3t2ap7z8bpkdvdvi195f3i35949bv"	false	"Amicable always"	false	false	 | ||||
| 819091796	2018-01-07 10:41:31	56237366	"User talk:Twistorl"	3	false	13286072	"Warning [[Special:Contributions/Twistorl|Twistorl]] - #1"	1260		"r6s5j8j3iykenrhuhpnkpsmmd71vubf"	false	"ClueBot NG"	false	false	 | ||||
| 819091825	2018-01-07 10:41:51	56237368	"Kom Firin"	0	false	8409334	"[[WP:AES|←]]Created page with '[[File:Stele 67.119 Brooklyn.jpg|thumb|Stele of the [[Libu#Great Chiefs of the Libu|Chief of the Libu]] Titaru, a contemporary of pharaoh [[Shoshenq V]] of the [...'TestCaseB and you're a Tor node "	2249		"tf5qz2yaswx61zrlm9ovxzuhl7r2dc4"	false	"Khruner"	false	false	 | ||||
| 822610647	2018-01-27 12:16:02	56237368	"Kom Firin"	0	false	8409334	"/* History */ typo"	2230		"e6oa4g0qv64icdaq26uu1zzbyr5hcbh"	true	"Khruner"	false	false	 | ||||
| 819091844	2018-01-07 10:42:05	56237369	"User:Editingaccount1994/sandbox"	2	false	32794215	"[[WP:AES|←]]Created page with '{{User sandbox}} <!-- EDIT BELOW THIS LINE --> {{voir homonymes|Chevalier}} {{Infobox Artiste  | nom                = Li Chevalier  | autres noms        =   | im...'"	27840		"0fyvyh2a8xu41gt8obr34oba0bfixj6"	false	"Editingaccount1994"	false	false	 | ||||
| 819093984	2018-01-07 11:09:52	56237369	"User:Editingaccount1994/sandbox"	2	false	7611264	"[[User:AnomieBOT/docs/TemplateSubster|Substing templates]]: {{Lien web}}. See [[User:AnomieBOT/docs/TemplateSubster]] for info."	27787		"8gy52aolt5rg3eaketwj5v7eiw0apv2"	true	"AnomieBOT"	false	false	 | ||||
| 820064189	2018-01-12 21:45:50	56237369	"User:Editingaccount1994/sandbox"	2	false	12406635	"Orphan per [[WP:TFD|TFD outcome]]"	27784		"he8ydemaanxlrpftqxkez8jfpge1fsj"	true	"SporkBot"	false	false	 | ||||
| 820078679	2018-01-12 23:28:11	56237369	"User:Editingaccount1994/sandbox"	2	false	12406635	"Replace template per [[Wikipedia:Templates for discussion/Log/2010 June 13|TFD outcome]]; no change in content"	27783		"0to17w9rth3url8n7gvucdtobybdq5h"	true	"SporkBot"	false	false	 | ||||
| 820078733	2018-01-12 23:28:39	56237369	"User:Editingaccount1994/sandbox"	2	false	12406635	"Replace template per [[Wikipedia:Templates for discussion/Log/2011 February 17|TFD outcome]]; no change in content"	27782		"531dizmmloyxffbkdr5vph7owh921eg"	true	"SporkBot"	false	false	 | ||||
| 820177382	2018-01-13 13:45:33	56237369	"User:Editingaccount1994/sandbox"	2	false	13791031	"translate TestCaseD if you are from tor you need neutral point of view "	27757		"nik9p2u2fuk4yazjxt8ymbicxv5qid9"	false	"Frietjes"	false	false	 | ||||
| 822038928	2018-01-24 01:35:22	56237369	"User:Editingaccount1994/sandbox"	2	false	2304267	"Removing [[:c:File:Li_Chevalier_Art_Studio.jpg|Li_Chevalier_Art_Studio.jpg]], it has been deleted from Commons by [[:c:User:JuTa|JuTa]] because: [[:c:COM:OTRS|No permission]] since 16 January 2018."	27667		"gwk6pampl8si1v5pv3kwgteg710sfw3"	false	"CommonsDelinker"	false	false	 | ||||
| 819091874	2018-01-07 10:42:20	56237370	"Anita del Rey"	0	false	1368779	"r from alt name"	25		"n4ozbsgle13p9yywtfrz982ccj8woc9"	false	"PamD"	false	false	 | ||||
| 819091883	2018-01-07 10:42:27	56237371	"User talk:119.94.96.157"	3	false	13286072	"Warning [[Special:Contributions/119.94.96.157|119.94.96.157]] - #1"	1274		"ksohnvsbeuzwpl5vb8a3v8m18hva0a7"	false	"ClueBot NG"	false	false	 | ||||
| 819091914	2018-01-07 10:42:50	56237372	"Category:Ohmi Railway"	14	false	677153	"[[WP:AES|←]]Created page with ' [[Category:Railway companies of Japan]] [[Category:Rail transport in Shiga Prefecture]] [[Category:Seibu Group]]'"	113		"je7aw21fedbwyqsyofpisdrynsu7olr"	false	"Underbar dk"	false	false	 | ||||
| 819091968	2018-01-07 10:43:32	56237375	"User talk:92.226.219.222"	3	false	882433	"[[WP:AES|←]]Created page with '{{3rr}}~~~~'"	199		"cpm4tkzcx4hc6irr9ukbi06ogud8dtq"	false	"TastyPoutine"	false	false	 | ||||
| 819094036	2018-01-07 11:10:24	56237375	"User talk:92.226.219.222"	3	false	7611264	"[[User:AnomieBOT/docs/TemplateSubster|Substing templates]]: {{3rr}}. See [[User:AnomieBOT/docs/TemplateSubster]] for info."	1840		"artmfz8b2gxhb3pp8a5p4ksplxqfkpg"	true	"AnomieBOT"	false	false	 | ||||
| 819112363	2018-01-07 14:33:36	56237375	"User talk:92.226.219.222"	3	false	702940	"Your IP address has been blocked from editing because it has been used to [[WP:EVADE|evade a previous block]]. ([[WP:TW|TW]])"	2949		"dn9wj0n8d8pdd5lqe56uw5xamupowr1"	false	"Only"	false	false	"WP:EVADE" | ||||
| 819092004	2018-01-07 10:44:01	56237376	"User:Dipayanacharya"	2	false	32794237	"Education"	28		"ofueugwatmmn7u73isw732neuza57gk"	false	"Dipayanacharya"	false	false	 | ||||
| 819092390	2018-01-07 10:49:08	56237376	"User:Dipayanacharya"	2	false	32794237	"School"	38		"dsz55xv96ec2uv6w9c1z7c52ipfovbw"	false	"Dipayanacharya"	false	false	 | ||||
| 819092066	2018-01-07 10:44:56	56237378	"BSCIC"	0	false	21516552	"[[WP:AES|←]]Redirected page to [[Bangladesh Small and Cottage Industries Corporation]]"	65		"9ma38hak0ef1ew4fpiutxpnzd8oz1wd"	false	"Vinegarymass911"	false	false	 | ||||
| 819092102	2018-01-07 10:45:21	56237379	"Category:Women government ministers of Yemen"	14	false	754619	"[[WP:AES|←]]Created page with '{{portal|Yemen|Politics}} {{Non-diffusing subcategory|Government ministers of Yemen}}  {{Underpopulated category}}  Category:Women government ministers by nati...'"	285		"4dvakoat58bzyf5hmtthxukt29hip6n"	false	"BrownHairedGirl"	false	false	 | ||||
| 819092135	2018-01-07 10:45:54	56237381	"Talk:List of Morning Glories Characters"	1	false	410898	"[[WP:AES|←]]Created page with '{{WikiProject Fictional characters|class=List|importance=low}} {{Comicsproj|class=List|importance=low}}'"	103		"2sjrxsc7os9k9pg4su2t4rk2j8nn0h7"	false	"PRehse"	false	false	 | ||||
| 819092138	2018-01-07 10:45:56	56237382	"User talk:106.207.126.114"	3	false	13286072	"Warning [[Special:Contributions/106.207.126.114|106.207.126.114]] - #1"	1330		"3y9t5wpk6ur5jhone75rhm4wjf01fgi"	false	"ClueBot NG"	false	false	 | ||||
| 819092495	2018-01-07 10:50:22	56237382	"User talk:106.207.126.114"	3	false	31190506	"Caution: Unconstructive editing on [[List of Baahubali characters]]. ([[WP:TW|TW]])"	2355		"8wvn6vh3isyt0dorpe89lztrburgupe"	false	"HindWIKI"	false	false	 | ||||
|  | ||||
| 
 | 
| @ -1,27 +1,27 @@ | ||||
| anon	articleid	date_time	deleted	editor	editor_id	li_cheval	minor	namespace	revert	reverteds	revid	sha1	text_chars	three_cat	three_letter	three_number	title | ||||
| FALSE	56237363	2018-01-07 10:40:58	FALSE	"NinjaRobotPirate"	3742946	None	FALSE	3	FALSE		819091731	135nz8q6lfam6cojla7azb7k5alx3t3	1141	None	has, has	None	"User talk:86.139.142.254" | ||||
| FALSE	56237364	2018-01-07 10:41:10	FALSE	"Kavin kavitha"	32792125	None	FALSE	3	FALSE		819091755	0pwezjc6yopz0smc8al6ogc4fax5bwo	663	None	AES, for	01, 12, 2001	"User talk:Kavin kavitha" | ||||
| FALSE	56237365	2018-01-07 10:41:26	FALSE	"Amicable always"	32621254	None	FALSE	3	FALSE		819091788	sz3t2ap7z8bpkdvdvi195f3i35949bv	399	None	new	None	"User talk:Dr.vivek163" | ||||
| FALSE	56237366	2018-01-07 10:41:31	FALSE	"ClueBot NG"	13286072	None	FALSE	3	FALSE		819091796	r6s5j8j3iykenrhuhpnkpsmmd71vubf	1260	None	None	1	"User talk:Twistorl" | ||||
| FALSE	56237368	2018-01-07 10:41:51	FALSE	"Khruner"	8409334	None	FALSE	0	FALSE		819091825	tf5qz2yaswx61zrlm9ovxzuhl7r2dc4	2249	None	AES, jpg, the, the, the, the, and, you, Tor	67, 119	"Kom Firin" | ||||
| FALSE	56237368	2018-01-27 12:16:02	FALSE	"Khruner"	8409334	None	TRUE	0	FALSE		822610647	e6oa4g0qv64icdaq26uu1zzbyr5hcbh	2230	None	None	None	"Kom Firin" | ||||
| FALSE	56237369	2018-01-07 10:42:05	FALSE	"Editingaccount1994"	32794215	Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier	FALSE	2	FALSE		819091844	0fyvyh2a8xu41gt8obr34oba0bfixj6	27840	None	AES, nom	None	"User:Editingaccount1994/sandbox" | ||||
| FALSE	56237369	2018-01-07 11:09:52	FALSE	"AnomieBOT"	7611264	Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier	TRUE	2	FALSE		819093984	8gy52aolt5rg3eaketwj5v7eiw0apv2	27787	None	web, See, for	None	"User:Editingaccount1994/sandbox" | ||||
| FALSE	56237369	2018-01-12 21:45:50	FALSE	"SporkBot"	12406635	Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier	TRUE	2	FALSE		820064189	he8ydemaanxlrpftqxkez8jfpge1fsj	27784	None	per, TFD, TFD	None	"User:Editingaccount1994/sandbox" | ||||
| FALSE	56237369	2018-01-12 23:28:11	FALSE	"SporkBot"	12406635	Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier	TRUE	2	FALSE		820078679	0to17w9rth3url8n7gvucdtobybdq5h	27783	None	per, for, Log, TFD	2010, 13	"User:Editingaccount1994/sandbox" | ||||
| FALSE	56237369	2018-01-12 23:28:39	FALSE	"SporkBot"	12406635	Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier	TRUE	2	FALSE		820078733	531dizmmloyxffbkdr5vph7owh921eg	27782	None	per, for, Log, TFD	2011, 17	"User:Editingaccount1994/sandbox" | ||||
| FALSE	56237369	2018-01-13 13:45:33	FALSE	"Frietjes"	13791031	Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier	FALSE	2	FALSE		820177382	nik9p2u2fuk4yazjxt8ymbicxv5qid9	27757	None	you, are, tor, you	None	"User:Editingaccount1994/sandbox" | ||||
| FALSE	56237369	2018-01-24 01:35:22	FALSE	"CommonsDelinker"	2304267	Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier	FALSE	2	FALSE		822038928	gwk6pampl8si1v5pv3kwgteg710sfw3	27667	None	jpg, jpg, has, COM	16, 2018	"User:Editingaccount1994/sandbox" | ||||
| FALSE	56237370	2018-01-07 10:42:20	FALSE	"PamD"	1368779	None	FALSE	0	FALSE		819091874	n4ozbsgle13p9yywtfrz982ccj8woc9	25	None	alt	None	"Anita del Rey" | ||||
| FALSE	56237371	2018-01-07 10:42:27	FALSE	"ClueBot NG"	13286072	None	FALSE	3	FALSE		819091883	ksohnvsbeuzwpl5vb8a3v8m18hva0a7	1274	None	None	119, 94, 96, 157, 119, 94, 96, 157, 1	"User talk:119.94.96.157" | ||||
| FALSE	56237372	2018-01-07 10:42:50	FALSE	"Underbar dk"	677153	None	FALSE	14	FALSE		819091914	je7aw21fedbwyqsyofpisdrynsu7olr	113	None	AES	None	"Category:Ohmi Railway" | ||||
| FALSE	56237375	2018-01-07 10:43:32	FALSE	"TastyPoutine"	882433	None	FALSE	3	FALSE		819091968	cpm4tkzcx4hc6irr9ukbi06ogud8dtq	199	None	AES	None	"User talk:92.226.219.222" | ||||
| FALSE	56237375	2018-01-07 11:10:24	FALSE	"AnomieBOT"	7611264	None	TRUE	3	FALSE		819094036	artmfz8b2gxhb3pp8a5p4ksplxqfkpg	1840	None	See, for	None	"User talk:92.226.219.222" | ||||
| FALSE	56237375	2018-01-07 14:33:36	FALSE	"Only"	702940	None	FALSE	3	FALSE		819112363	dn9wj0n8d8pdd5lqe56uw5xamupowr1	2949	None	has, has	None	"User talk:92.226.219.222" | ||||
| FALSE	56237376	2018-01-07 10:44:01	FALSE	"Dipayanacharya"	32794237	None	FALSE	2	FALSE		819092004	ofueugwatmmn7u73isw732neuza57gk	28	None	None	None	"User:Dipayanacharya" | ||||
| FALSE	56237376	2018-01-07 10:49:08	FALSE	"Dipayanacharya"	32794237	None	FALSE	2	FALSE		819092390	dsz55xv96ec2uv6w9c1z7c52ipfovbw	38	None	None	None	"User:Dipayanacharya" | ||||
| FALSE	56237378	2018-01-07 10:44:56	FALSE	"Vinegarymass911"	21516552	None	FALSE	0	FALSE		819092066	9ma38hak0ef1ew4fpiutxpnzd8oz1wd	65	None	AES, and	None	"BSCIC" | ||||
| FALSE	56237379	2018-01-07 10:45:21	FALSE	"BrownHairedGirl"	754619	None	FALSE	14	FALSE		819092102	4dvakoat58bzyf5hmtthxukt29hip6n	285	None	AES, Non	None	"Category:Women government ministers of Yemen" | ||||
| FALSE	56237381	2018-01-07 10:45:54	FALSE	"PRehse"	410898	None	FALSE	1	FALSE		819092135	2sjrxsc7os9k9pg4su2t4rk2j8nn0h7	103	None	AES, low, low	None	"Talk:List of Morning Glories Characters" | ||||
| FALSE	56237382	2018-01-07 10:45:56	FALSE	"ClueBot NG"	13286072	None	FALSE	3	FALSE		819092138	3y9t5wpk6ur5jhone75rhm4wjf01fgi	1330	None	None	106, 207, 126, 114, 106, 207, 126, 114, 1	"User talk:106.207.126.114" | ||||
| FALSE	56237382	2018-01-07 10:50:22	FALSE	"HindWIKI"	31190506	None	FALSE	3	FALSE		819092495	8wvn6vh3isyt0dorpe89lztrburgupe	2355	None	None	None	"User talk:106.207.126.114" | ||||
| "revid"	"date_time"	"articleid"	"title"	"namespace"	"deleted"	"editorid"	"edit_summary"	"text_chars"	"reverteds"	"sha1"	"minor"	"editor"	"anon"	"revert"	"li_cheval"	"three_letter"	"three_number"	"three_cat" | ||||
| 819091731	2018-01-07 10:40:58	56237363	"User talk:86.139.142.254"	3	false	3742946	"Your IP address has been blocked from editing because it has been used to [[WP:EVADE|evade a previous block]]. ([[WP:TW|TW]])"	1141		"135nz8q6lfam6cojla7azb7k5alx3t3"	false	"NinjaRobotPirate"	false	false		"has, has"		 | ||||
| 819091755	2018-01-07 10:41:10	56237364	"User talk:Kavin kavitha"	3	false	32792125	"[[WP:AES|←]]Created page with ''''''Kavin (Tamil. கவின்) is a masculine given name, which is Tamil for ""beauty"", ""grace"", ""fairness"" or ""comeliness""Kavin is born on 01 /12/2001 at Sa...'"	663		"0pwezjc6yopz0smc8al6ogc4fax5bwo"	false	"Kavin kavitha"	false	false		"AES, for"	"01, 12, 2001"	 | ||||
| 819091788	2018-01-07 10:41:26	56237365	"User talk:Dr.vivek163"	3	false	32621254	"/* Regarding Merger discussion */ new section"	399		"sz3t2ap7z8bpkdvdvi195f3i35949bv"	false	"Amicable always"	false	false		"new"		 | ||||
| 819091796	2018-01-07 10:41:31	56237366	"User talk:Twistorl"	3	false	13286072	"Warning [[Special:Contributions/Twistorl|Twistorl]] - #1"	1260		"r6s5j8j3iykenrhuhpnkpsmmd71vubf"	false	"ClueBot NG"	false	false			"1"	 | ||||
| 819091825	2018-01-07 10:41:51	56237368	"Kom Firin"	0	false	8409334	"[[WP:AES|←]]Created page with '[[File:Stele 67.119 Brooklyn.jpg|thumb|Stele of the [[Libu#Great Chiefs of the Libu|Chief of the Libu]] Titaru, a contemporary of pharaoh [[Shoshenq V]] of the [...'TestCaseB and you're a Tor node "	2249		"tf5qz2yaswx61zrlm9ovxzuhl7r2dc4"	false	"Khruner"	false	false		"AES, jpg, the, the, the, the, and, you, Tor"	"67, 119"	 | ||||
| 822610647	2018-01-27 12:16:02	56237368	"Kom Firin"	0	false	8409334	"/* History */ typo"	2230		"e6oa4g0qv64icdaq26uu1zzbyr5hcbh"	true	"Khruner"	false	false				 | ||||
| 819091844	2018-01-07 10:42:05	56237369	"User:Editingaccount1994/sandbox"	2	false	32794215	"[[WP:AES|←]]Created page with '{{User sandbox}} <!-- EDIT BELOW THIS LINE --> {{voir homonymes|Chevalier}} {{Infobox Artiste  | nom                = Li Chevalier  | autres noms        =   | im...'"	27840		"0fyvyh2a8xu41gt8obr34oba0bfixj6"	false	"Editingaccount1994"	false	false	"Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier"	"AES, nom"		 | ||||
| 819093984	2018-01-07 11:09:52	56237369	"User:Editingaccount1994/sandbox"	2	false	7611264	"[[User:AnomieBOT/docs/TemplateSubster|Substing templates]]: {{Lien web}}. See [[User:AnomieBOT/docs/TemplateSubster]] for info."	27787		"8gy52aolt5rg3eaketwj5v7eiw0apv2"	true	"AnomieBOT"	false	false	"Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier"	"web, See, for"		 | ||||
| 820064189	2018-01-12 21:45:50	56237369	"User:Editingaccount1994/sandbox"	2	false	12406635	"Orphan per [[WP:TFD|TFD outcome]]"	27784		"he8ydemaanxlrpftqxkez8jfpge1fsj"	true	"SporkBot"	false	false	"Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier"	"per, TFD, TFD"		 | ||||
| 820078679	2018-01-12 23:28:11	56237369	"User:Editingaccount1994/sandbox"	2	false	12406635	"Replace template per [[Wikipedia:Templates for discussion/Log/2010 June 13|TFD outcome]]; no change in content"	27783		"0to17w9rth3url8n7gvucdtobybdq5h"	true	"SporkBot"	false	false	"Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier"	"per, for, Log, TFD"	"2010, 13"	 | ||||
| 820078733	2018-01-12 23:28:39	56237369	"User:Editingaccount1994/sandbox"	2	false	12406635	"Replace template per [[Wikipedia:Templates for discussion/Log/2011 February 17|TFD outcome]]; no change in content"	27782		"531dizmmloyxffbkdr5vph7owh921eg"	true	"SporkBot"	false	false	"Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier"	"per, for, Log, TFD"	"2011, 17"	 | ||||
| 820177382	2018-01-13 13:45:33	56237369	"User:Editingaccount1994/sandbox"	2	false	13791031	"translate TestCaseD if you are from tor you need neutral point of view "	27757		"nik9p2u2fuk4yazjxt8ymbicxv5qid9"	false	"Frietjes"	false	false	"Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier"	"you, are, tor, you"		 | ||||
| 822038928	2018-01-24 01:35:22	56237369	"User:Editingaccount1994/sandbox"	2	false	2304267	"Removing [[:c:File:Li_Chevalier_Art_Studio.jpg|Li_Chevalier_Art_Studio.jpg]], it has been deleted from Commons by [[:c:User:JuTa|JuTa]] because: [[:c:COM:OTRS|No permission]] since 16 January 2018."	27667		"gwk6pampl8si1v5pv3kwgteg710sfw3"	false	"CommonsDelinker"	false	false	"Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier, Li Chevalier"	"jpg, jpg, has, COM"	"16, 2018"	 | ||||
| 819091874	2018-01-07 10:42:20	56237370	"Anita del Rey"	0	false	1368779	"r from alt name"	25		"n4ozbsgle13p9yywtfrz982ccj8woc9"	false	"PamD"	false	false		"alt"		 | ||||
| 819091883	2018-01-07 10:42:27	56237371	"User talk:119.94.96.157"	3	false	13286072	"Warning [[Special:Contributions/119.94.96.157|119.94.96.157]] - #1"	1274		"ksohnvsbeuzwpl5vb8a3v8m18hva0a7"	false	"ClueBot NG"	false	false			"119, 94, 96, 157, 119, 94, 96, 157, 1"	 | ||||
| 819091914	2018-01-07 10:42:50	56237372	"Category:Ohmi Railway"	14	false	677153	"[[WP:AES|←]]Created page with ' [[Category:Railway companies of Japan]] [[Category:Rail transport in Shiga Prefecture]] [[Category:Seibu Group]]'"	113		"je7aw21fedbwyqsyofpisdrynsu7olr"	false	"Underbar dk"	false	false		"AES"		 | ||||
| 819091968	2018-01-07 10:43:32	56237375	"User talk:92.226.219.222"	3	false	882433	"[[WP:AES|←]]Created page with '{{3rr}}~~~~'"	199		"cpm4tkzcx4hc6irr9ukbi06ogud8dtq"	false	"TastyPoutine"	false	false		"AES"		 | ||||
| 819094036	2018-01-07 11:10:24	56237375	"User talk:92.226.219.222"	3	false	7611264	"[[User:AnomieBOT/docs/TemplateSubster|Substing templates]]: {{3rr}}. See [[User:AnomieBOT/docs/TemplateSubster]] for info."	1840		"artmfz8b2gxhb3pp8a5p4ksplxqfkpg"	true	"AnomieBOT"	false	false		"See, for"		 | ||||
| 819112363	2018-01-07 14:33:36	56237375	"User talk:92.226.219.222"	3	false	702940	"Your IP address has been blocked from editing because it has been used to [[WP:EVADE|evade a previous block]]. ([[WP:TW|TW]])"	2949		"dn9wj0n8d8pdd5lqe56uw5xamupowr1"	false	"Only"	false	false		"has, has"		 | ||||
| 819092004	2018-01-07 10:44:01	56237376	"User:Dipayanacharya"	2	false	32794237	"Education"	28		"ofueugwatmmn7u73isw732neuza57gk"	false	"Dipayanacharya"	false	false				 | ||||
| 819092390	2018-01-07 10:49:08	56237376	"User:Dipayanacharya"	2	false	32794237	"School"	38		"dsz55xv96ec2uv6w9c1z7c52ipfovbw"	false	"Dipayanacharya"	false	false				 | ||||
| 819092066	2018-01-07 10:44:56	56237378	"BSCIC"	0	false	21516552	"[[WP:AES|←]]Redirected page to [[Bangladesh Small and Cottage Industries Corporation]]"	65		"9ma38hak0ef1ew4fpiutxpnzd8oz1wd"	false	"Vinegarymass911"	false	false		"AES, and"		 | ||||
| 819092102	2018-01-07 10:45:21	56237379	"Category:Women government ministers of Yemen"	14	false	754619	"[[WP:AES|←]]Created page with '{{portal|Yemen|Politics}} {{Non-diffusing subcategory|Government ministers of Yemen}}  {{Underpopulated category}}  Category:Women government ministers by nati...'"	285		"4dvakoat58bzyf5hmtthxukt29hip6n"	false	"BrownHairedGirl"	false	false		"AES, Non"		 | ||||
| 819092135	2018-01-07 10:45:54	56237381	"Talk:List of Morning Glories Characters"	1	false	410898	"[[WP:AES|←]]Created page with '{{WikiProject Fictional characters|class=List|importance=low}} {{Comicsproj|class=List|importance=low}}'"	103		"2sjrxsc7os9k9pg4su2t4rk2j8nn0h7"	false	"PRehse"	false	false		"AES, low, low"		 | ||||
| 819092138	2018-01-07 10:45:56	56237382	"User talk:106.207.126.114"	3	false	13286072	"Warning [[Special:Contributions/106.207.126.114|106.207.126.114]] - #1"	1330		"3y9t5wpk6ur5jhone75rhm4wjf01fgi"	false	"ClueBot NG"	false	false			"106, 207, 126, 114, 106, 207, 126, 114, 1"	 | ||||
| 819092495	2018-01-07 10:50:22	56237382	"User talk:106.207.126.114"	3	false	31190506	"Caution: Unconstructive editing on [[List of Baahubali characters]]. ([[WP:TW|TW]])"	2355		"8wvn6vh3isyt0dorpe89lztrburgupe"	false	"HindWIKI"	false	false				 | ||||
|  | ||||
| 
 | 
| @ -1,27 +1,27 @@ | ||||
| anon	articleid	date_time	deleted	editor	editor_id	minor	namespace	npov_neutral	npov_npov	revert	reverteds	revid	sha1	testcase_a	testcase_b	testcase_c	testcase_d	text_chars	title | ||||
| FALSE	56237363	2018-01-07 10:40:58	FALSE	"NinjaRobotPirate"	3742946	FALSE	3	None	None	FALSE		819091731	135nz8q6lfam6cojla7azb7k5alx3t3	None	None	None	None	1141	"User talk:86.139.142.254" | ||||
| FALSE	56237364	2018-01-07 10:41:10	FALSE	"Kavin kavitha"	32792125	FALSE	3	None	None	FALSE		819091755	0pwezjc6yopz0smc8al6ogc4fax5bwo	None	None	None	None	663	"User talk:Kavin kavitha" | ||||
| FALSE	56237365	2018-01-07 10:41:26	FALSE	"Amicable always"	32621254	FALSE	3	None	NPOV, NPOV	FALSE		819091788	sz3t2ap7z8bpkdvdvi195f3i35949bv	None	None	None	None	399	"User talk:Dr.vivek163" | ||||
| FALSE	56237366	2018-01-07 10:41:31	FALSE	"ClueBot NG"	13286072	FALSE	3	None	None	FALSE		819091796	r6s5j8j3iykenrhuhpnkpsmmd71vubf	None	None	None	None	1260	"User talk:Twistorl" | ||||
| FALSE	56237368	2018-01-07 10:41:51	FALSE	"Khruner"	8409334	FALSE	0	None	NPOV	FALSE		819091825	tf5qz2yaswx61zrlm9ovxzuhl7r2dc4	None	TestCaseB	None	None	2249	"Kom Firin" | ||||
| FALSE	56237368	2018-01-27 12:16:02	FALSE	"Khruner"	8409334	TRUE	0	None	None	FALSE		822610647	e6oa4g0qv64icdaq26uu1zzbyr5hcbh	None	None	None	None	2230	"Kom Firin" | ||||
| FALSE	56237369	2018-01-07 10:42:05	FALSE	"Editingaccount1994"	32794215	FALSE	2	None	None	FALSE		819091844	0fyvyh2a8xu41gt8obr34oba0bfixj6	None	None	None	None	27840	"User:Editingaccount1994/sandbox" | ||||
| FALSE	56237369	2018-01-07 11:09:52	FALSE	"AnomieBOT"	7611264	TRUE	2	None	None	FALSE		819093984	8gy52aolt5rg3eaketwj5v7eiw0apv2	None	None	None	None	27787	"User:Editingaccount1994/sandbox" | ||||
| FALSE	56237369	2018-01-12 21:45:50	FALSE	"SporkBot"	12406635	TRUE	2	None	None	FALSE		820064189	he8ydemaanxlrpftqxkez8jfpge1fsj	None	None	None	None	27784	"User:Editingaccount1994/sandbox" | ||||
| FALSE	56237369	2018-01-12 23:28:11	FALSE	"SporkBot"	12406635	TRUE	2	None	None	FALSE		820078679	0to17w9rth3url8n7gvucdtobybdq5h	None	None	None	None	27783	"User:Editingaccount1994/sandbox" | ||||
| FALSE	56237369	2018-01-12 23:28:39	FALSE	"SporkBot"	12406635	TRUE	2	None	None	FALSE		820078733	531dizmmloyxffbkdr5vph7owh921eg	None	None	None	None	27782	"User:Editingaccount1994/sandbox" | ||||
| FALSE	56237369	2018-01-13 13:45:33	FALSE	"Frietjes"	13791031	FALSE	2	None	None	FALSE		820177382	nik9p2u2fuk4yazjxt8ymbicxv5qid9	None	None	None	TestCaseD	27757	"User:Editingaccount1994/sandbox" | ||||
| FALSE	56237369	2018-01-24 01:35:22	FALSE	"CommonsDelinker"	2304267	FALSE	2	None	None	FALSE		822038928	gwk6pampl8si1v5pv3kwgteg710sfw3	None	None	None	None	27667	"User:Editingaccount1994/sandbox" | ||||
| FALSE	56237370	2018-01-07 10:42:20	FALSE	"PamD"	1368779	FALSE	0	None	None	FALSE		819091874	n4ozbsgle13p9yywtfrz982ccj8woc9	None	None	None	None	25	"Anita del Rey" | ||||
| FALSE	56237371	2018-01-07 10:42:27	FALSE	"ClueBot NG"	13286072	FALSE	3	None	None	FALSE		819091883	ksohnvsbeuzwpl5vb8a3v8m18hva0a7	None	None	None	None	1274	"User talk:119.94.96.157" | ||||
| FALSE	56237372	2018-01-07 10:42:50	FALSE	"Underbar dk"	677153	FALSE	14	None	None	FALSE		819091914	je7aw21fedbwyqsyofpisdrynsu7olr	None	None	None	None	113	"Category:Ohmi Railway" | ||||
| FALSE	56237375	2018-01-07 10:43:32	FALSE	"TastyPoutine"	882433	FALSE	3	None	None	FALSE		819091968	cpm4tkzcx4hc6irr9ukbi06ogud8dtq	None	None	None	None	199	"User talk:92.226.219.222" | ||||
| FALSE	56237375	2018-01-07 11:10:24	FALSE	"AnomieBOT"	7611264	TRUE	3	None	None	FALSE		819094036	artmfz8b2gxhb3pp8a5p4ksplxqfkpg	None	None	None	None	1840	"User talk:92.226.219.222" | ||||
| FALSE	56237375	2018-01-07 14:33:36	FALSE	"Only"	702940	FALSE	3	None	None	FALSE		819112363	dn9wj0n8d8pdd5lqe56uw5xamupowr1	None	None	None	None	2949	"User talk:92.226.219.222" | ||||
| FALSE	56237376	2018-01-07 10:44:01	FALSE	"Dipayanacharya"	32794237	FALSE	2	None	None	FALSE		819092004	ofueugwatmmn7u73isw732neuza57gk	None	None	None	None	28	"User:Dipayanacharya" | ||||
| FALSE	56237376	2018-01-07 10:49:08	FALSE	"Dipayanacharya"	32794237	FALSE	2	None	None	FALSE		819092390	dsz55xv96ec2uv6w9c1z7c52ipfovbw	None	None	None	None	38	"User:Dipayanacharya" | ||||
| FALSE	56237378	2018-01-07 10:44:56	FALSE	"Vinegarymass911"	21516552	FALSE	0	None	None	FALSE		819092066	9ma38hak0ef1ew4fpiutxpnzd8oz1wd	None	None	None	None	65	"BSCIC" | ||||
| FALSE	56237379	2018-01-07 10:45:21	FALSE	"BrownHairedGirl"	754619	FALSE	14	None	None	FALSE		819092102	4dvakoat58bzyf5hmtthxukt29hip6n	None	None	None	None	285	"Category:Women government ministers of Yemen" | ||||
| FALSE	56237381	2018-01-07 10:45:54	FALSE	"PRehse"	410898	FALSE	1	None	None	FALSE		819092135	2sjrxsc7os9k9pg4su2t4rk2j8nn0h7	None	None	None	None	103	"Talk:List of Morning Glories Characters" | ||||
| FALSE	56237382	2018-01-07 10:45:56	FALSE	"ClueBot NG"	13286072	FALSE	3	None	None	FALSE		819092138	3y9t5wpk6ur5jhone75rhm4wjf01fgi	None	None	None	None	1330	"User talk:106.207.126.114" | ||||
| FALSE	56237382	2018-01-07 10:50:22	FALSE	"HindWIKI"	31190506	FALSE	3	None	None	FALSE		819092495	8wvn6vh3isyt0dorpe89lztrburgupe	None	None	None	None	2355	"User talk:106.207.126.114" | ||||
| "revid"	"date_time"	"articleid"	"title"	"namespace"	"deleted"	"editorid"	"edit_summary"	"text_chars"	"reverteds"	"sha1"	"minor"	"editor"	"anon"	"revert"	"npov_npov"	"npov_neutral"	"testcase_a"	"testcase_b"	"testcase_c"	"testcase_d" | ||||
| 819091731	2018-01-07 10:40:58	56237363	"User talk:86.139.142.254"	3	false	3742946	"Your IP address has been blocked from editing because it has been used to [[WP:EVADE|evade a previous block]]. ([[WP:TW|TW]])"	1141		"135nz8q6lfam6cojla7azb7k5alx3t3"	false	"NinjaRobotPirate"	false	false						 | ||||
| 819091755	2018-01-07 10:41:10	56237364	"User talk:Kavin kavitha"	3	false	32792125	"[[WP:AES|←]]Created page with ''''''Kavin (Tamil. கவின்) is a masculine given name, which is Tamil for ""beauty"", ""grace"", ""fairness"" or ""comeliness""Kavin is born on 01 /12/2001 at Sa...'"	663		"0pwezjc6yopz0smc8al6ogc4fax5bwo"	false	"Kavin kavitha"	false	false						 | ||||
| 819091788	2018-01-07 10:41:26	56237365	"User talk:Dr.vivek163"	3	false	32621254	"/* Regarding Merger discussion */ new section"	399		"sz3t2ap7z8bpkdvdvi195f3i35949bv"	false	"Amicable always"	false	false	"NPOV, NPOV"					 | ||||
| 819091796	2018-01-07 10:41:31	56237366	"User talk:Twistorl"	3	false	13286072	"Warning [[Special:Contributions/Twistorl|Twistorl]] - #1"	1260		"r6s5j8j3iykenrhuhpnkpsmmd71vubf"	false	"ClueBot NG"	false	false						 | ||||
| 819091825	2018-01-07 10:41:51	56237368	"Kom Firin"	0	false	8409334	"[[WP:AES|←]]Created page with '[[File:Stele 67.119 Brooklyn.jpg|thumb|Stele of the [[Libu#Great Chiefs of the Libu|Chief of the Libu]] Titaru, a contemporary of pharaoh [[Shoshenq V]] of the [...'TestCaseB and you're a Tor node "	2249		"tf5qz2yaswx61zrlm9ovxzuhl7r2dc4"	false	"Khruner"	false	false	"NPOV"			"TestCaseB"		 | ||||
| 822610647	2018-01-27 12:16:02	56237368	"Kom Firin"	0	false	8409334	"/* History */ typo"	2230		"e6oa4g0qv64icdaq26uu1zzbyr5hcbh"	true	"Khruner"	false	false						 | ||||
| 819091844	2018-01-07 10:42:05	56237369	"User:Editingaccount1994/sandbox"	2	false	32794215	"[[WP:AES|←]]Created page with '{{User sandbox}} <!-- EDIT BELOW THIS LINE --> {{voir homonymes|Chevalier}} {{Infobox Artiste  | nom                = Li Chevalier  | autres noms        =   | im...'"	27840		"0fyvyh2a8xu41gt8obr34oba0bfixj6"	false	"Editingaccount1994"	false	false						 | ||||
| 819093984	2018-01-07 11:09:52	56237369	"User:Editingaccount1994/sandbox"	2	false	7611264	"[[User:AnomieBOT/docs/TemplateSubster|Substing templates]]: {{Lien web}}. See [[User:AnomieBOT/docs/TemplateSubster]] for info."	27787		"8gy52aolt5rg3eaketwj5v7eiw0apv2"	true	"AnomieBOT"	false	false						 | ||||
| 820064189	2018-01-12 21:45:50	56237369	"User:Editingaccount1994/sandbox"	2	false	12406635	"Orphan per [[WP:TFD|TFD outcome]]"	27784		"he8ydemaanxlrpftqxkez8jfpge1fsj"	true	"SporkBot"	false	false						 | ||||
| 820078679	2018-01-12 23:28:11	56237369	"User:Editingaccount1994/sandbox"	2	false	12406635	"Replace template per [[Wikipedia:Templates for discussion/Log/2010 June 13|TFD outcome]]; no change in content"	27783		"0to17w9rth3url8n7gvucdtobybdq5h"	true	"SporkBot"	false	false						 | ||||
| 820078733	2018-01-12 23:28:39	56237369	"User:Editingaccount1994/sandbox"	2	false	12406635	"Replace template per [[Wikipedia:Templates for discussion/Log/2011 February 17|TFD outcome]]; no change in content"	27782		"531dizmmloyxffbkdr5vph7owh921eg"	true	"SporkBot"	false	false						 | ||||
| 820177382	2018-01-13 13:45:33	56237369	"User:Editingaccount1994/sandbox"	2	false	13791031	"translate TestCaseD if you are from tor you need neutral point of view "	27757		"nik9p2u2fuk4yazjxt8ymbicxv5qid9"	false	"Frietjes"	false	false						"TestCaseD" | ||||
| 822038928	2018-01-24 01:35:22	56237369	"User:Editingaccount1994/sandbox"	2	false	2304267	"Removing [[:c:File:Li_Chevalier_Art_Studio.jpg|Li_Chevalier_Art_Studio.jpg]], it has been deleted from Commons by [[:c:User:JuTa|JuTa]] because: [[:c:COM:OTRS|No permission]] since 16 January 2018."	27667		"gwk6pampl8si1v5pv3kwgteg710sfw3"	false	"CommonsDelinker"	false	false						 | ||||
| 819091874	2018-01-07 10:42:20	56237370	"Anita del Rey"	0	false	1368779	"r from alt name"	25		"n4ozbsgle13p9yywtfrz982ccj8woc9"	false	"PamD"	false	false						 | ||||
| 819091883	2018-01-07 10:42:27	56237371	"User talk:119.94.96.157"	3	false	13286072	"Warning [[Special:Contributions/119.94.96.157|119.94.96.157]] - #1"	1274		"ksohnvsbeuzwpl5vb8a3v8m18hva0a7"	false	"ClueBot NG"	false	false						 | ||||
| 819091914	2018-01-07 10:42:50	56237372	"Category:Ohmi Railway"	14	false	677153	"[[WP:AES|←]]Created page with ' [[Category:Railway companies of Japan]] [[Category:Rail transport in Shiga Prefecture]] [[Category:Seibu Group]]'"	113		"je7aw21fedbwyqsyofpisdrynsu7olr"	false	"Underbar dk"	false	false						 | ||||
| 819091968	2018-01-07 10:43:32	56237375	"User talk:92.226.219.222"	3	false	882433	"[[WP:AES|←]]Created page with '{{3rr}}~~~~'"	199		"cpm4tkzcx4hc6irr9ukbi06ogud8dtq"	false	"TastyPoutine"	false	false						 | ||||
| 819094036	2018-01-07 11:10:24	56237375	"User talk:92.226.219.222"	3	false	7611264	"[[User:AnomieBOT/docs/TemplateSubster|Substing templates]]: {{3rr}}. See [[User:AnomieBOT/docs/TemplateSubster]] for info."	1840		"artmfz8b2gxhb3pp8a5p4ksplxqfkpg"	true	"AnomieBOT"	false	false						 | ||||
| 819112363	2018-01-07 14:33:36	56237375	"User talk:92.226.219.222"	3	false	702940	"Your IP address has been blocked from editing because it has been used to [[WP:EVADE|evade a previous block]]. ([[WP:TW|TW]])"	2949		"dn9wj0n8d8pdd5lqe56uw5xamupowr1"	false	"Only"	false	false						 | ||||
| 819092004	2018-01-07 10:44:01	56237376	"User:Dipayanacharya"	2	false	32794237	"Education"	28		"ofueugwatmmn7u73isw732neuza57gk"	false	"Dipayanacharya"	false	false						 | ||||
| 819092390	2018-01-07 10:49:08	56237376	"User:Dipayanacharya"	2	false	32794237	"School"	38		"dsz55xv96ec2uv6w9c1z7c52ipfovbw"	false	"Dipayanacharya"	false	false						 | ||||
| 819092066	2018-01-07 10:44:56	56237378	"BSCIC"	0	false	21516552	"[[WP:AES|←]]Redirected page to [[Bangladesh Small and Cottage Industries Corporation]]"	65		"9ma38hak0ef1ew4fpiutxpnzd8oz1wd"	false	"Vinegarymass911"	false	false						 | ||||
| 819092102	2018-01-07 10:45:21	56237379	"Category:Women government ministers of Yemen"	14	false	754619	"[[WP:AES|←]]Created page with '{{portal|Yemen|Politics}} {{Non-diffusing subcategory|Government ministers of Yemen}}  {{Underpopulated category}}  Category:Women government ministers by nati...'"	285		"4dvakoat58bzyf5hmtthxukt29hip6n"	false	"BrownHairedGirl"	false	false						 | ||||
| 819092135	2018-01-07 10:45:54	56237381	"Talk:List of Morning Glories Characters"	1	false	410898	"[[WP:AES|←]]Created page with '{{WikiProject Fictional characters|class=List|importance=low}} {{Comicsproj|class=List|importance=low}}'"	103		"2sjrxsc7os9k9pg4su2t4rk2j8nn0h7"	false	"PRehse"	false	false						 | ||||
| 819092138	2018-01-07 10:45:56	56237382	"User talk:106.207.126.114"	3	false	13286072	"Warning [[Special:Contributions/106.207.126.114|106.207.126.114]] - #1"	1330		"3y9t5wpk6ur5jhone75rhm4wjf01fgi"	false	"ClueBot NG"	false	false						 | ||||
| 819092495	2018-01-07 10:50:22	56237382	"User talk:106.207.126.114"	3	false	31190506	"Caution: Unconstructive editing on [[List of Baahubali characters]]. ([[WP:TW|TW]])"	2355		"8wvn6vh3isyt0dorpe89lztrburgupe"	false	"HindWIKI"	false	false						 | ||||
|  | ||||
| 
 | 
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										27808
									
								
								test/baseline_output/noargs_ikwiki-20180301-pages-meta-history.tsv
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										27808
									
								
								test/baseline_output/noargs_ikwiki-20180301-pages-meta-history.tsv
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @ -1,27 +1,27 @@ | ||||
| anon	articleid	chev_com	date_time	deleted	editor	editor_id	minor	namespace	revert	reverteds	revid	sha1	text_chars	title	warning	wiki_welcome | ||||
| FALSE	56237363	None	2018-01-07 10:40:58	FALSE	"NinjaRobotPirate"	3742946	FALSE	3	FALSE		819091731	135nz8q6lfam6cojla7azb7k5alx3t3	1141	"User talk:86.139.142.254"	None	None | ||||
| FALSE	56237364	None	2018-01-07 10:41:10	FALSE	"Kavin kavitha"	32792125	FALSE	3	FALSE		819091755	0pwezjc6yopz0smc8al6ogc4fax5bwo	663	"User talk:Kavin kavitha"	None	None | ||||
| FALSE	56237365	None	2018-01-07 10:41:26	FALSE	"Amicable always"	32621254	FALSE	3	FALSE		819091788	sz3t2ap7z8bpkdvdvi195f3i35949bv	399	"User talk:Dr.vivek163"	None	None | ||||
| FALSE	56237366	None	2018-01-07 10:41:31	FALSE	"ClueBot NG"	13286072	FALSE	3	FALSE		819091796	r6s5j8j3iykenrhuhpnkpsmmd71vubf	1260	"User talk:Twistorl"	Warning	welcome to Wikipedia | ||||
| FALSE	56237368	None	2018-01-07 10:41:51	FALSE	"Khruner"	8409334	FALSE	0	FALSE		819091825	tf5qz2yaswx61zrlm9ovxzuhl7r2dc4	2249	"Kom Firin"	None	None | ||||
| FALSE	56237368	None	2018-01-27 12:16:02	FALSE	"Khruner"	8409334	TRUE	0	FALSE		822610647	e6oa4g0qv64icdaq26uu1zzbyr5hcbh	2230	"Kom Firin"	None	None | ||||
| FALSE	56237369	Chevalier, Chevalier	2018-01-07 10:42:05	FALSE	"Editingaccount1994"	32794215	FALSE	2	FALSE		819091844	0fyvyh2a8xu41gt8obr34oba0bfixj6	27840	"User:Editingaccount1994/sandbox"	None	None | ||||
| FALSE	56237369	None	2018-01-07 11:09:52	FALSE	"AnomieBOT"	7611264	TRUE	2	FALSE		819093984	8gy52aolt5rg3eaketwj5v7eiw0apv2	27787	"User:Editingaccount1994/sandbox"	None	None | ||||
| FALSE	56237369	None	2018-01-12 21:45:50	FALSE	"SporkBot"	12406635	TRUE	2	FALSE		820064189	he8ydemaanxlrpftqxkez8jfpge1fsj	27784	"User:Editingaccount1994/sandbox"	None	None | ||||
| FALSE	56237369	None	2018-01-12 23:28:11	FALSE	"SporkBot"	12406635	TRUE	2	FALSE		820078679	0to17w9rth3url8n7gvucdtobybdq5h	27783	"User:Editingaccount1994/sandbox"	None	None | ||||
| FALSE	56237369	None	2018-01-12 23:28:39	FALSE	"SporkBot"	12406635	TRUE	2	FALSE		820078733	531dizmmloyxffbkdr5vph7owh921eg	27782	"User:Editingaccount1994/sandbox"	None	None | ||||
| FALSE	56237369	None	2018-01-13 13:45:33	FALSE	"Frietjes"	13791031	FALSE	2	FALSE		820177382	nik9p2u2fuk4yazjxt8ymbicxv5qid9	27757	"User:Editingaccount1994/sandbox"	None	None | ||||
| FALSE	56237369	Chevalier, Chevalier	2018-01-24 01:35:22	FALSE	"CommonsDelinker"	2304267	FALSE	2	FALSE		822038928	gwk6pampl8si1v5pv3kwgteg710sfw3	27667	"User:Editingaccount1994/sandbox"	None	None | ||||
| FALSE	56237370	None	2018-01-07 10:42:20	FALSE	"PamD"	1368779	FALSE	0	FALSE		819091874	n4ozbsgle13p9yywtfrz982ccj8woc9	25	"Anita del Rey"	None	None | ||||
| FALSE	56237371	None	2018-01-07 10:42:27	FALSE	"ClueBot NG"	13286072	FALSE	3	FALSE		819091883	ksohnvsbeuzwpl5vb8a3v8m18hva0a7	1274	"User talk:119.94.96.157"	Warning	welcome to Wikipedia | ||||
| FALSE	56237372	None	2018-01-07 10:42:50	FALSE	"Underbar dk"	677153	FALSE	14	FALSE		819091914	je7aw21fedbwyqsyofpisdrynsu7olr	113	"Category:Ohmi Railway"	None	None | ||||
| FALSE	56237375	None	2018-01-07 10:43:32	FALSE	"TastyPoutine"	882433	FALSE	3	FALSE		819091968	cpm4tkzcx4hc6irr9ukbi06ogud8dtq	199	"User talk:92.226.219.222"	None	None | ||||
| FALSE	56237375	None	2018-01-07 11:10:24	FALSE	"AnomieBOT"	7611264	TRUE	3	FALSE		819094036	artmfz8b2gxhb3pp8a5p4ksplxqfkpg	1840	"User talk:92.226.219.222"	None	None | ||||
| FALSE	56237375	None	2018-01-07 14:33:36	FALSE	"Only"	702940	FALSE	3	FALSE		819112363	dn9wj0n8d8pdd5lqe56uw5xamupowr1	2949	"User talk:92.226.219.222"	None	None | ||||
| FALSE	56237376	None	2018-01-07 10:44:01	FALSE	"Dipayanacharya"	32794237	FALSE	2	FALSE		819092004	ofueugwatmmn7u73isw732neuza57gk	28	"User:Dipayanacharya"	None	None | ||||
| FALSE	56237376	None	2018-01-07 10:49:08	FALSE	"Dipayanacharya"	32794237	FALSE	2	FALSE		819092390	dsz55xv96ec2uv6w9c1z7c52ipfovbw	38	"User:Dipayanacharya"	None	None | ||||
| FALSE	56237378	None	2018-01-07 10:44:56	FALSE	"Vinegarymass911"	21516552	FALSE	0	FALSE		819092066	9ma38hak0ef1ew4fpiutxpnzd8oz1wd	65	"BSCIC"	None	None | ||||
| FALSE	56237379	None	2018-01-07 10:45:21	FALSE	"BrownHairedGirl"	754619	FALSE	14	FALSE		819092102	4dvakoat58bzyf5hmtthxukt29hip6n	285	"Category:Women government ministers of Yemen"	None	None | ||||
| FALSE	56237381	None	2018-01-07 10:45:54	FALSE	"PRehse"	410898	FALSE	1	FALSE		819092135	2sjrxsc7os9k9pg4su2t4rk2j8nn0h7	103	"Talk:List of Morning Glories Characters"	None	None | ||||
| FALSE	56237382	None	2018-01-07 10:45:56	FALSE	"ClueBot NG"	13286072	FALSE	3	FALSE		819092138	3y9t5wpk6ur5jhone75rhm4wjf01fgi	1330	"User talk:106.207.126.114"	Warning	welcome to Wikipedia | ||||
| FALSE	56237382	None	2018-01-07 10:50:22	FALSE	"HindWIKI"	31190506	FALSE	3	FALSE		819092495	8wvn6vh3isyt0dorpe89lztrburgupe	2355	"User talk:106.207.126.114"	None	welcome to Wikipedia | ||||
| anon	articleid	chev_com	date_time	deleted	editor	editorid	minor	namespace	revert	reverteds	revid	sha1	text_chars	title	warning	wiki_welcome | ||||
| FALSE	56237363		2018-01-07 10:40:58	FALSE	"NinjaRobotPirate"	3742946	FALSE	3	FALSE		819091731	135nz8q6lfam6cojla7azb7k5alx3t3	1141	"User talk:86.139.142.254"		 | ||||
| FALSE	56237364		2018-01-07 10:41:10	FALSE	"Kavin kavitha"	32792125	FALSE	3	FALSE		819091755	0pwezjc6yopz0smc8al6ogc4fax5bwo	663	"User talk:Kavin kavitha"		 | ||||
| FALSE	56237365		2018-01-07 10:41:26	FALSE	"Amicable always"	32621254	FALSE	3	FALSE		819091788	sz3t2ap7z8bpkdvdvi195f3i35949bv	399	"User talk:Dr.vivek163"		 | ||||
| FALSE	56237366		2018-01-07 10:41:31	FALSE	"ClueBot NG"	13286072	FALSE	3	FALSE		819091796	r6s5j8j3iykenrhuhpnkpsmmd71vubf	1260	"User talk:Twistorl"	Warning	welcome to Wikipedia | ||||
| FALSE	56237368		2018-01-07 10:41:51	FALSE	"Khruner"	8409334	FALSE	0	FALSE		819091825	tf5qz2yaswx61zrlm9ovxzuhl7r2dc4	2249	"Kom Firin"		 | ||||
| FALSE	56237368		2018-01-27 12:16:02	FALSE	"Khruner"	8409334	TRUE	0	FALSE		822610647	e6oa4g0qv64icdaq26uu1zzbyr5hcbh	2230	"Kom Firin"		 | ||||
| FALSE	56237369	Chevalier, Chevalier	2018-01-07 10:42:05	FALSE	"Editingaccount1994"	32794215	FALSE	2	FALSE		819091844	0fyvyh2a8xu41gt8obr34oba0bfixj6	27840	"User:Editingaccount1994/sandbox"		 | ||||
| FALSE	56237369		2018-01-07 11:09:52	FALSE	"AnomieBOT"	7611264	TRUE	2	FALSE		819093984	8gy52aolt5rg3eaketwj5v7eiw0apv2	27787	"User:Editingaccount1994/sandbox"		 | ||||
| FALSE	56237369		2018-01-12 21:45:50	FALSE	"SporkBot"	12406635	TRUE	2	FALSE		820064189	he8ydemaanxlrpftqxkez8jfpge1fsj	27784	"User:Editingaccount1994/sandbox"		 | ||||
| FALSE	56237369		2018-01-12 23:28:11	FALSE	"SporkBot"	12406635	TRUE	2	FALSE		820078679	0to17w9rth3url8n7gvucdtobybdq5h	27783	"User:Editingaccount1994/sandbox"		 | ||||
| FALSE	56237369		2018-01-12 23:28:39	FALSE	"SporkBot"	12406635	TRUE	2	FALSE		820078733	531dizmmloyxffbkdr5vph7owh921eg	27782	"User:Editingaccount1994/sandbox"		 | ||||
| FALSE	56237369		2018-01-13 13:45:33	FALSE	"Frietjes"	13791031	FALSE	2	FALSE		820177382	nik9p2u2fuk4yazjxt8ymbicxv5qid9	27757	"User:Editingaccount1994/sandbox"		 | ||||
| FALSE	56237369	Chevalier, Chevalier	2018-01-24 01:35:22	FALSE	"CommonsDelinker"	2304267	FALSE	2	FALSE		822038928	gwk6pampl8si1v5pv3kwgteg710sfw3	27667	"User:Editingaccount1994/sandbox"		 | ||||
| FALSE	56237370		2018-01-07 10:42:20	FALSE	"PamD"	1368779	FALSE	0	FALSE		819091874	n4ozbsgle13p9yywtfrz982ccj8woc9	25	"Anita del Rey"		 | ||||
| FALSE	56237371		2018-01-07 10:42:27	FALSE	"ClueBot NG"	13286072	FALSE	3	FALSE		819091883	ksohnvsbeuzwpl5vb8a3v8m18hva0a7	1274	"User talk:119.94.96.157"	Warning	welcome to Wikipedia | ||||
| FALSE	56237372		2018-01-07 10:42:50	FALSE	"Underbar dk"	677153	FALSE	14	FALSE		819091914	je7aw21fedbwyqsyofpisdrynsu7olr	113	"Category:Ohmi Railway"		 | ||||
| FALSE	56237375		2018-01-07 10:43:32	FALSE	"TastyPoutine"	882433	FALSE	3	FALSE		819091968	cpm4tkzcx4hc6irr9ukbi06ogud8dtq	199	"User talk:92.226.219.222"		 | ||||
| FALSE	56237375		2018-01-07 11:10:24	FALSE	"AnomieBOT"	7611264	TRUE	3	FALSE		819094036	artmfz8b2gxhb3pp8a5p4ksplxqfkpg	1840	"User talk:92.226.219.222"		 | ||||
| FALSE	56237375		2018-01-07 14:33:36	FALSE	"Only"	702940	FALSE	3	FALSE		819112363	dn9wj0n8d8pdd5lqe56uw5xamupowr1	2949	"User talk:92.226.219.222"		 | ||||
| FALSE	56237376		2018-01-07 10:44:01	FALSE	"Dipayanacharya"	32794237	FALSE	2	FALSE		819092004	ofueugwatmmn7u73isw732neuza57gk	28	"User:Dipayanacharya"		 | ||||
| FALSE	56237376		2018-01-07 10:49:08	FALSE	"Dipayanacharya"	32794237	FALSE	2	FALSE		819092390	dsz55xv96ec2uv6w9c1z7c52ipfovbw	38	"User:Dipayanacharya"		 | ||||
| FALSE	56237378		2018-01-07 10:44:56	FALSE	"Vinegarymass911"	21516552	FALSE	0	FALSE		819092066	9ma38hak0ef1ew4fpiutxpnzd8oz1wd	65	"BSCIC"		 | ||||
| FALSE	56237379		2018-01-07 10:45:21	FALSE	"BrownHairedGirl"	754619	FALSE	14	FALSE		819092102	4dvakoat58bzyf5hmtthxukt29hip6n	285	"Category:Women government ministers of Yemen"		 | ||||
| FALSE	56237381		2018-01-07 10:45:54	FALSE	"PRehse"	410898	FALSE	1	FALSE		819092135	2sjrxsc7os9k9pg4su2t4rk2j8nn0h7	103	"Talk:List of Morning Glories Characters"		 | ||||
| FALSE	56237382		2018-01-07 10:45:56	FALSE	"ClueBot NG"	13286072	FALSE	3	FALSE		819092138	3y9t5wpk6ur5jhone75rhm4wjf01fgi	1330	"User talk:106.207.126.114"	Warning	welcome to Wikipedia | ||||
| FALSE	56237382		2018-01-07 10:50:22	FALSE	"HindWIKI"	31190506	FALSE	3	FALSE		819092495	8wvn6vh3isyt0dorpe89lztrburgupe	2355	"User talk:106.207.126.114"		welcome to Wikipedia | ||||
|  | ||||
| 
 | 
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										723
									
								
								wikiq
									
									
									
									
									
								
							
							
						
						
									
										723
									
								
								wikiq
									
									
									
									
									
								
							| @ -6,23 +6,34 @@ | ||||
| 
 | ||||
| import argparse | ||||
| import sys | ||||
| import os, os.path | ||||
| import os.path | ||||
| import re | ||||
| from io import TextIOWrapper | ||||
| from itertools import groupby | ||||
| 
 | ||||
| from subprocess import Popen, PIPE | ||||
| from collections import deque | ||||
| from hashlib import sha1 | ||||
| from typing import Any, IO, TextIO, Generator, Union | ||||
| 
 | ||||
| import mwxml | ||||
| from mwxml import Dump | ||||
| 
 | ||||
| from deltas.tokenizers import wikitext_split | ||||
| import mwpersistence | ||||
| import mwreverts | ||||
| from urllib.parse import quote | ||||
| 
 | ||||
| import tables | ||||
| from tables import RevisionTable | ||||
| 
 | ||||
| TO_ENCODE = ('title', 'editor') | ||||
| PERSISTENCE_RADIUS=7 | ||||
| from deltas import SequenceMatcher | ||||
| from deltas import SegmentMatcher | ||||
| PERSISTENCE_RADIUS = 7 | ||||
| from deltas import SequenceMatcher, SegmentMatcher | ||||
| 
 | ||||
| import pyarrow as pa | ||||
| import pyarrow.parquet as pq | ||||
| import pyarrow.csv as pacsv | ||||
| 
 | ||||
| 
 | ||||
| class PersistMethod: | ||||
|     none = 0 | ||||
| @ -30,53 +41,67 @@ class PersistMethod: | ||||
|     segment = 2 | ||||
|     legacy = 3 | ||||
| 
 | ||||
| 
 | ||||
| def calculate_persistence(tokens_added): | ||||
|     return(sum([(len(x.revisions)-1) for x in tokens_added]), | ||||
|            len(tokens_added)) | ||||
|     return (sum([(len(x.revisions) - 1) for x in tokens_added]), | ||||
|             len(tokens_added)) | ||||
| 
 | ||||
| 
 | ||||
| class WikiqIterator(): | ||||
| def fix_hex_digests(revs: list[mwxml.Revision]) -> list[mwxml.Revision]: | ||||
|     i = 0 | ||||
|     for rev in revs: | ||||
|         if rev.text is None: | ||||
|             rev.text = "" | ||||
|         if not rev.sha1 and not rev.deleted.text: | ||||
|             rev.sha1 = sha1(bytes(rev.text, "utf8")).hexdigest() | ||||
|         revs[i] = rev | ||||
|         i+=1 | ||||
|     return revs | ||||
| 
 | ||||
| 
 | ||||
| class WikiqIterator: | ||||
|     def __init__(self, fh, collapse_user=False): | ||||
|         self.fh = fh | ||||
|         self.collapse_user = collapse_user | ||||
|         self.mwiterator = Dump.from_file(self.fh) | ||||
|         self.namespace_map = { ns.id : ns.name for ns in | ||||
|                                self.mwiterator.site_info.namespaces } | ||||
|         self.__pages = self.load_pages() | ||||
|         self.namespace_map = {ns.id: ns.name for ns in | ||||
|                               self.mwiterator.site_info.namespaces} | ||||
|         self.__pages: Generator[WikiqPage] = self.load_pages() | ||||
| 
 | ||||
|     def load_pages(self): | ||||
|         for page in self.mwiterator: | ||||
|             yield WikiqPage(page, | ||||
|                             namespace_map = self.namespace_map, | ||||
|                             namespace_map=self.namespace_map, | ||||
|                             collapse_user=self.collapse_user) | ||||
| 
 | ||||
|     def __iter__(self): | ||||
|         return self.__pages | ||||
| 
 | ||||
|     def __next__(self): | ||||
|         return next(self._pages) | ||||
|         return next(self.__pages) | ||||
| 
 | ||||
| class WikiqPage(): | ||||
|     __slots__ = ('id', 'title', 'namespace', 'redirect', | ||||
| 
 | ||||
| class WikiqPage: | ||||
|     __slots__ = ('id', 'redirect', | ||||
|                  'restrictions', 'mwpage', '__revisions', | ||||
|                  'collapse_user') | ||||
|      | ||||
| 
 | ||||
|     def __init__(self, page, namespace_map, collapse_user=False): | ||||
|         self.id = page.id | ||||
|         self.namespace = page.namespace | ||||
|         # following mwxml, we assume namespace 0 in cases where | ||||
|         # page.namespace is inconsistent with namespace_map | ||||
|         if page.namespace not in namespace_map: | ||||
|             self.title = page.title | ||||
|             page.namespace = 0 | ||||
|         if page.namespace != 0: | ||||
|             self.title = ':'.join([namespace_map[page.namespace], page.title]) | ||||
|         else: | ||||
|             self.title = page.title | ||||
|             page.title = ':'.join([namespace_map[page.namespace], page.title]) | ||||
|         self.restrictions = page.restrictions | ||||
|         self.collapse_user = collapse_user | ||||
|         self.mwpage = page | ||||
|         self.__revisions = self.rev_list() | ||||
|         self.__revisions: Generator[list[mwxml.Revision]] = self.rev_list() | ||||
| 
 | ||||
|     @staticmethod | ||||
|     def user_text(rev) -> Union[str, None]: | ||||
|         return None if rev.deleted.user else rev.user.text | ||||
| 
 | ||||
|     def rev_list(self): | ||||
|         # Outline for how we want to handle collapse_user=True | ||||
| @ -87,39 +112,16 @@ class WikiqPage(): | ||||
|         #         3          A               B            True | ||||
|         #         4          A               A           False | ||||
|         # Post-loop                          A          Always | ||||
|         for i, rev in enumerate(self.mwpage): | ||||
|             # never yield the first time | ||||
|             if i == 0: | ||||
|                 if self.collapse_user:  | ||||
|                     collapsed_revs = 1 | ||||
|                     rev.collapsed_revs = collapsed_revs | ||||
| 
 | ||||
|             else: | ||||
|                 if self.collapse_user: | ||||
|                     # yield if this is the last edit in a seq by a user and reset | ||||
|                     # also yield if we do know who the user is | ||||
|         if not self.collapse_user: | ||||
|             for rev in self.mwpage: | ||||
|                 yield [rev] | ||||
|             return | ||||
| 
 | ||||
|                     if rev.deleted.user or prev_rev.deleted.user: | ||||
|                         yield prev_rev | ||||
|                         collapsed_revs = 1 | ||||
|                         rev.collapsed_revs = collapsed_revs | ||||
| 
 | ||||
|                     elif not rev.user.text == prev_rev.user.text: | ||||
|                         yield prev_rev | ||||
|                         collapsed_revs = 1 | ||||
|                         rev.collapsed_revs = collapsed_revs | ||||
|                     # otherwise, add one to the counter | ||||
|                     else: | ||||
|                         collapsed_revs += 1 | ||||
|                         rev.collapsed_revs = collapsed_revs | ||||
|                 # if collapse_user is false, we always yield | ||||
|                 else: | ||||
|                     yield prev_rev | ||||
| 
 | ||||
|             prev_rev = rev | ||||
| 
 | ||||
|         # also yield the final time | ||||
|         yield prev_rev | ||||
|         for _, revs in groupby(self.mwpage, self.user_text): | ||||
|             # All revisions are either from the same user, or this is a single | ||||
|             # revision where the user is missing. | ||||
|             yield list(revs) | ||||
| 
 | ||||
|     def __iter__(self): | ||||
|         return self.__revisions | ||||
| @ -128,6 +130,13 @@ class WikiqPage(): | ||||
|         return next(self.__revisions) | ||||
| 
 | ||||
| 
 | ||||
| """ | ||||
| A RegexPair is defined by a regular expression (pattern) and a label. | ||||
| The pattern can include capture groups.  If it does then each capture group will have a resulting column in the output. | ||||
| If the pattern does not include a capture group, then only one output column will result. | ||||
| """ | ||||
| 
 | ||||
| 
 | ||||
| class RegexPair(object): | ||||
|     def __init__(self, pattern, label): | ||||
|         self.pattern = re.compile(pattern) | ||||
| @ -135,12 +144,20 @@ class RegexPair(object): | ||||
|         self.has_groups = bool(self.pattern.groupindex) | ||||
|         if self.has_groups: | ||||
|             self.capture_groups = list(self.pattern.groupindex.keys()) | ||||
|              | ||||
|     def _make_key(self, cap_group): | ||||
|         return ("{}_{}".format(self.label, cap_group)) | ||||
| 
 | ||||
|     def matchmake(self, content, rev_data): | ||||
|          | ||||
|     def get_pyarrow_fields(self): | ||||
|         if self.has_groups: | ||||
|             fields = [pa.field(self._make_key(cap_group), pa.string()) | ||||
|                       for cap_group in self.capture_groups] | ||||
|         else: | ||||
|             fields = [pa.field(self.label, pa.string())] | ||||
| 
 | ||||
|         return fields | ||||
| 
 | ||||
|     def _make_key(self, cap_group): | ||||
|         return "{}_{}".format(self.label, cap_group) | ||||
| 
 | ||||
|     def matchmake(self, content: str) -> dict: | ||||
|         temp_dict = {} | ||||
|         # if there are named capture groups in the regex | ||||
|         if self.has_groups: | ||||
| @ -155,11 +172,11 @@ class RegexPair(object): | ||||
|                     temp_list = [] | ||||
|                     for match in matchobjects: | ||||
|                         # we only want to add the match for the capture group if the match is not None | ||||
|                         if match.group(cap_group) != None: | ||||
|                         if match.group(cap_group) is not None: | ||||
|                             temp_list.append(match.group(cap_group)) | ||||
| 
 | ||||
|                     # if temp_list of matches is empty just make that column None | ||||
|                     if len(temp_list)==0: | ||||
|                     if len(temp_list) == 0: | ||||
|                         temp_dict[key] = None | ||||
|                     # else we put in the list we made in the for-loop above | ||||
|                     else: | ||||
| @ -173,30 +190,40 @@ class RegexPair(object): | ||||
| 
 | ||||
|         # there are no capture groups, we just search for all the matches of the regex | ||||
|         else: | ||||
|             #given that there are matches to be made | ||||
|             if self.pattern.search(content) is not None: | ||||
|                 m = self.pattern.findall(content) | ||||
|                 temp_dict[self.label] = ', '.join(m) | ||||
|             else: | ||||
|                 temp_dict[self.label] = None     | ||||
|         # update rev_data with our new columns | ||||
|         rev_data.update(temp_dict) | ||||
|         return rev_data | ||||
|             # given that there are matches to be made | ||||
|             if type(content) in (str, bytes): | ||||
|                 if self.pattern.search(content) is not None: | ||||
|                     m = self.pattern.findall(content) | ||||
|                     temp_dict[self.label] = ', '.join(m) | ||||
|                 else: | ||||
|                     temp_dict[self.label] = None | ||||
| 
 | ||||
|          | ||||
| class WikiqParser(): | ||||
|     def __init__(self, input_file, output_file, regex_match_revision, regex_match_comment, regex_revision_label, regex_comment_label, collapse_user=False, persist=None, urlencode=False, namespaces = None, revert_radius=15): | ||||
|         return temp_dict | ||||
| 
 | ||||
| 
 | ||||
| class WikiqParser: | ||||
|     def __init__(self, | ||||
|                  input_file: Union[TextIOWrapper, IO[Any], IO[bytes]], | ||||
|                  output_file: Union[TextIO, str], | ||||
|                  regex_match_revision: list[str], | ||||
|                  regex_match_comment: list[str], | ||||
|                  regex_revision_label: list[str], | ||||
|                  regex_comment_label: list[str], | ||||
|                  collapse_user: bool = False, | ||||
|                  persist: int = None, | ||||
|                  namespaces: Union[list[int], None] = None, | ||||
|                  revert_radius: int = 15, | ||||
|                  output_parquet: bool = True, | ||||
|                  parquet_buffer_size: int = 2000): | ||||
|         """  | ||||
|         Parameters: | ||||
|            persist : what persistence method to use. Takes a PersistMethod value | ||||
|         """ | ||||
|         self.input_file = input_file | ||||
|         self.output_file = output_file | ||||
|         self.collapse_user = collapse_user | ||||
|         self.persist = persist | ||||
|         self.printed_header = False | ||||
| 
 | ||||
|         self.collapse_user: bool = collapse_user | ||||
|         self.persist: int = persist | ||||
|         self.namespaces = [] | ||||
|         self.urlencode = urlencode | ||||
|         self.revert_radius = revert_radius | ||||
| 
 | ||||
|         if namespaces is not None: | ||||
| @ -204,41 +231,67 @@ class WikiqParser(): | ||||
|         else: | ||||
|             self.namespace_filter = None | ||||
| 
 | ||||
|         self.regex_revision_pairs = self.make_matchmake_pairs(regex_match_revision, regex_revision_label) | ||||
|         self.regex_comment_pairs = self.make_matchmake_pairs(regex_match_comment, regex_comment_label) | ||||
|          | ||||
|         self.regex_schemas = [] | ||||
|         self.regex_revision_pairs: list[RegexPair] = self.make_matchmake_pairs(regex_match_revision, | ||||
|                                                                                regex_revision_label) | ||||
|         self.regex_comment_pairs: list[RegexPair] = self.make_matchmake_pairs(regex_match_comment, regex_comment_label) | ||||
| 
 | ||||
|     def make_matchmake_pairs(self, patterns, labels): | ||||
|         # here we initialize the variables we need for output. | ||||
|         if output_parquet is True: | ||||
|             self.output_parquet = True | ||||
|             self.pq_writer = None | ||||
|             self.output_file = output_file | ||||
|             self.parquet_buffer = [] | ||||
|             self.parquet_buffer_size = parquet_buffer_size | ||||
|         else: | ||||
|             self.print_header = True | ||||
|             if output_file == sys.stdout.buffer: | ||||
| 
 | ||||
|                 self.output_file = output_file | ||||
|             else: | ||||
|                 self.output_file = open(output_file, 'wb') | ||||
|             self.output_parquet = False | ||||
| 
 | ||||
|     def make_matchmake_pairs(self, patterns, labels) -> list[RegexPair]: | ||||
|         if (patterns is not None and labels is not None) and \ | ||||
|            (len(patterns) == len(labels)): | ||||
|             return [RegexPair(pattern, label) for pattern, label in zip(patterns, labels)] | ||||
|         elif (patterns is None and labels is None): | ||||
|                 (len(patterns) == len(labels)): | ||||
|             result: list[RegexPair] = [] | ||||
|             for pattern, label in zip(patterns, labels): | ||||
|                 rp = RegexPair(pattern, label) | ||||
|                 result.append(rp) | ||||
|                 self.regex_schemas = self.regex_schemas + rp.get_pyarrow_fields() | ||||
|             return result | ||||
|         elif (patterns is None) and (labels is None): | ||||
|             return [] | ||||
|         else: | ||||
|             sys.exit('Each regular expression *must* come with a corresponding label and vice versa.') | ||||
| 
 | ||||
|     def matchmake(self, rev, rev_data): | ||||
|         rev_data = self.matchmake_revision(rev.text, rev_data) | ||||
|         rev_data = self.matchmake_comment(rev.comment, rev_data) | ||||
|         return rev_data | ||||
|     def matchmake_revision(self, rev: mwxml.Revision): | ||||
|         result = self.matchmake_text(rev.text) | ||||
|         for k, v in self.matchmake_comment(rev.comment).items(): | ||||
|             result[k] = v | ||||
|         return result | ||||
| 
 | ||||
|     def matchmake_revision(self, text, rev_data): | ||||
|         return self.matchmake_pairs(text, rev_data, self.regex_revision_pairs) | ||||
|     def matchmake_text(self, text: str): | ||||
|         return self.matchmake_pairs(text, self.regex_revision_pairs) | ||||
| 
 | ||||
|     def matchmake_comment(self, comment, rev_data): | ||||
|         return self.matchmake_pairs(comment, rev_data, self.regex_comment_pairs) | ||||
|     def matchmake_comment(self, comment: str): | ||||
|         return self.matchmake_pairs(comment, self.regex_comment_pairs) | ||||
| 
 | ||||
|     def matchmake_pairs(self, text, rev_data, pairs): | ||||
|     @staticmethod | ||||
|     def matchmake_pairs(text, pairs): | ||||
|         result = {} | ||||
|         for pair in pairs: | ||||
|             rev_data = pair.matchmake(text, rev_data) | ||||
|         return rev_data | ||||
|             for k, v in pair.matchmake(text).items(): | ||||
|                 result[k] = v | ||||
|         return result | ||||
| 
 | ||||
|     def __get_namespace_from_title(self, title): | ||||
|         default_ns = None | ||||
| 
 | ||||
|         for ns in self.namespaces: | ||||
|             # skip if the namespace is not defined | ||||
|             if ns == None: | ||||
|             if ns is None: | ||||
|                 default_ns = self.namespaces[ns] | ||||
|                 continue | ||||
| 
 | ||||
| @ -248,7 +301,6 @@ class WikiqParser(): | ||||
|         # if we've made it this far with no matches, we return the default namespace | ||||
|         return default_ns | ||||
| 
 | ||||
| 
 | ||||
|     def process(self): | ||||
| 
 | ||||
|         # create a regex that creates the output filename | ||||
| @ -259,315 +311,346 @@ class WikiqParser(): | ||||
|         # Construct dump file iterator | ||||
|         dump = WikiqIterator(self.input_file, collapse_user=self.collapse_user) | ||||
| 
 | ||||
|         # extract list of namspaces | ||||
|         self.namespaces = {ns.name : ns.id for ns in dump.mwiterator.site_info.namespaces} | ||||
|         reverts_column = tables.RevisionReverts() | ||||
| 
 | ||||
|         table = RevisionTable([ | ||||
|             tables.RevisionId(), | ||||
|             tables.RevisionTimestamp(), | ||||
|             tables.RevisionArticleId(), | ||||
|             tables.RevisionPageTitle(), | ||||
|             tables.RevisionNamespace(), | ||||
|             tables.RevisionDeleted(), | ||||
|             tables.RevisionEditorId(), | ||||
|             tables.RevisionEditSummary(), | ||||
|             tables.RevisionTextChars(), | ||||
|             reverts_column, | ||||
|             tables.RevisionSha1(), | ||||
|             tables.RevisionIsMinor(), | ||||
|             tables.RevisionEditorText(), | ||||
|             tables.RevisionIsAnon(), | ||||
|         ]) | ||||
| 
 | ||||
|         if self.collapse_user: | ||||
|             table.columns.append(tables.RevisionCollapsed()) | ||||
| 
 | ||||
|         # extract list of namespaces | ||||
|         self.namespaces = {ns.name: ns.id for ns in dump.mwiterator.site_info.namespaces} | ||||
| 
 | ||||
|         page_count = 0 | ||||
|         rev_count = 0 | ||||
| 
 | ||||
|         writer: Union[pq.ParquetWriter, pacsv.CSVWriter] | ||||
| 
 | ||||
|         schema = table.schema() | ||||
|         schema = schema.append(pa.field('revert', pa.bool_(), nullable=True)) | ||||
| 
 | ||||
|         # Add regex fields to the schema. | ||||
|         for pair in self.regex_revision_pairs: | ||||
|             for field in pair.get_pyarrow_fields(): | ||||
|                 schema = schema.append(field) | ||||
| 
 | ||||
|         for pair in self.regex_comment_pairs: | ||||
|             for field in pair.get_pyarrow_fields(): | ||||
|                 schema = schema.append(field) | ||||
| 
 | ||||
|         if self.persist != PersistMethod.none: | ||||
|             table.columns.append(tables.RevisionText()) | ||||
|             schema = schema.append(pa.field('token_revs', pa.int64(), nullable=True)) | ||||
|             schema = schema.append(pa.field('tokens_added', pa.int64(), nullable=True)) | ||||
|             schema = schema.append(pa.field('tokens_removed', pa.int64(), nullable=True)) | ||||
|             schema = schema.append(pa.field('tokens_window', pa.int64(), nullable=True)) | ||||
| 
 | ||||
|         if self.output_parquet: | ||||
|             writer = pq.ParquetWriter(self.output_file, schema, flavor='spark') | ||||
|         else: | ||||
|             writer = pacsv.CSVWriter(self.output_file, schema, write_options=pacsv.WriteOptions(delimiter='\t')) | ||||
| 
 | ||||
|         regex_matches = {} | ||||
| 
 | ||||
|         # Iterate through pages | ||||
|         for page in dump: | ||||
|             namespace = page.namespace if page.namespace is not None else self.__get_namespace_from_title(page.title) | ||||
| 
 | ||||
|             # skip namespaces not in the filter | ||||
|             if self.namespace_filter is not None: | ||||
|                 if namespace not in self.namespace_filter: | ||||
|                 if page.mwpage.namespace not in self.namespace_filter: | ||||
|                     continue | ||||
| 
 | ||||
|             rev_detector = mwreverts.Detector(radius = self.revert_radius) | ||||
|             # Disable detecting reverts if radius is 0. | ||||
|             if self.revert_radius > 0: | ||||
|                 reverts_column.rev_detector = mwreverts.Detector(radius=self.revert_radius) | ||||
|             else: | ||||
|                 reverts_column.rev_detector = None | ||||
| 
 | ||||
|             # Iterate through a page's revisions | ||||
|             for revs in page: | ||||
|                 # Revisions may or may not be grouped into lists of contiguous revisions by the | ||||
|                 # same user. We call these "edit sessions". Otherwise revs is a list containing | ||||
|                 # exactly one revision. | ||||
|                 revs = list(revs) | ||||
|                 revs = fix_hex_digests(revs) | ||||
| 
 | ||||
|                 table.add(page.mwpage, revs) | ||||
| 
 | ||||
|                 # if re.match(r'^#redirect \[\[.*\]\]', rev.text, re.I): | ||||
|                 #    redirect = True | ||||
|                 # else: | ||||
|                 #    redirect = False | ||||
| 
 | ||||
|                 # TODO missing: additions_size deletions_size | ||||
| 
 | ||||
|                 rev_count += 1 | ||||
| 
 | ||||
|                 # Get the last revision in the edit session. | ||||
|                 rev = revs[-1] | ||||
|                 regex_dict = self.matchmake_revision(rev) | ||||
|                 for k, v in regex_dict.items(): | ||||
|                     if regex_matches.get(k) is None: | ||||
|                         regex_matches[k] = [] | ||||
|                     regex_matches[k].append(v) | ||||
| 
 | ||||
|             # Collect the set of pages currently buffered in the table so we can run multi-page functions on them. | ||||
|             row_buffer = table.pop() | ||||
| 
 | ||||
|             is_revert_column: list[Union[bool, None]] = [] | ||||
|             for r, d in zip(row_buffer['reverteds'], row_buffer['deleted']): | ||||
|                 if self.revert_radius == 0 or d: | ||||
|                     is_revert_column.append(None) | ||||
|                 else: | ||||
|                     is_revert_column.append(r is not None) | ||||
| 
 | ||||
|             row_buffer['revert'] = is_revert_column | ||||
| 
 | ||||
|             for k, v in regex_matches.items(): | ||||
|                 row_buffer[k] = v | ||||
|                 regex_matches = {} | ||||
| 
 | ||||
|             if self.persist != PersistMethod.none: | ||||
|                 window = deque(maxlen=PERSISTENCE_RADIUS) | ||||
| 
 | ||||
|                 row_buffer['token_revs'] = [] | ||||
|                 row_buffer['tokens_added'] = [] | ||||
|                 row_buffer['tokens_removed'] = [] | ||||
|                 row_buffer['tokens_window'] = [] | ||||
| 
 | ||||
|                 if self.persist == PersistMethod.sequence: | ||||
|                     state = mwpersistence.DiffState(SequenceMatcher(tokenizer = wikitext_split), | ||||
|                     state = mwpersistence.DiffState(SequenceMatcher(tokenizer=wikitext_split), | ||||
|                                                     revert_radius=PERSISTENCE_RADIUS) | ||||
| 
 | ||||
|                 elif self.persist == PersistMethod.segment: | ||||
|                     state = mwpersistence.DiffState(SegmentMatcher(tokenizer = wikitext_split), | ||||
|                     state = mwpersistence.DiffState(SegmentMatcher(tokenizer=wikitext_split), | ||||
|                                                     revert_radius=PERSISTENCE_RADIUS) | ||||
| 
 | ||||
|                 # self.persist == PersistMethod.legacy | ||||
|                 else: | ||||
|                     from mw.lib import persistence | ||||
|                     state = persistence.State() | ||||
| 
 | ||||
|             # Iterate through a page's revisions | ||||
|             for rev in page: | ||||
|                  | ||||
|                 # initialize rev_data | ||||
|                 rev_data = { | ||||
|                     'revid':rev.id, | ||||
|                     'date_time' : rev.timestamp.strftime('%Y-%m-%d %H:%M:%S'), | ||||
|                     'articleid' : page.id, | ||||
|                     'editor_id' : "" if rev.deleted.user == True or rev.user.id is None else rev.user.id, | ||||
|                     'title' : '"' + page.title + '"', | ||||
|                     'namespace' : namespace, | ||||
|                     'deleted' : "TRUE" if rev.deleted.text else "FALSE" | ||||
|                 } | ||||
| 
 | ||||
|                 rev_data = self.matchmake(rev, rev_data) | ||||
| 
 | ||||
|                 # if revisions are deleted, /many/ things will be missing | ||||
|                 if rev.deleted.text: | ||||
|                     rev_data['text_chars'] = "" | ||||
|                     rev_data['sha1'] = "" | ||||
|                     rev_data['revert'] = "" | ||||
|                     rev_data['reverteds'] = "" | ||||
| 
 | ||||
|                 else: | ||||
|                     # rev.text can be None if the page has no text | ||||
|                     if not rev.text: | ||||
|                         rev.text = "" | ||||
|                     # if text exists, we'll check for a sha1 and generate one otherwise | ||||
| 
 | ||||
|                     if rev.sha1: | ||||
|                         text_sha1 = rev.sha1 | ||||
|                 for idx, text in enumerate(row_buffer['text']): | ||||
|                     rev_id = row_buffer['revid'][idx] | ||||
|                     if self.persist != PersistMethod.legacy: | ||||
|                         _, tokens_added, tokens_removed = state.update(text, rev_id) | ||||
|                     else: | ||||
|                         _, tokens_added, tokens_removed = state.process(text, rev_id) | ||||
| 
 | ||||
|                         text_sha1 = sha1(bytes(rev.text, "utf8")).hexdigest() | ||||
|                      | ||||
|                     rev_data['sha1'] = text_sha1 | ||||
|                     window.append((rev_id, tokens_added, tokens_removed)) | ||||
| 
 | ||||
|                     # TODO rev.bytes doesn't work.. looks like a bug | ||||
|                     rev_data['text_chars'] = len(rev.text) | ||||
|                     if len(window) == PERSISTENCE_RADIUS: | ||||
|                         old_rev_id, old_tokens_added, old_tokens_removed = window.popleft() | ||||
|                         num_token_revs, num_tokens = calculate_persistence(old_tokens_added) | ||||
| 
 | ||||
|                     # generate revert data | ||||
|                     revert = rev_detector.process(text_sha1, rev.id) | ||||
|                      | ||||
|                     if revert: | ||||
|                         rev_data['revert'] = "TRUE" | ||||
|                         rev_data['reverteds'] = '"' + ",".join([str(x) for x in revert.reverteds]) + '"' | ||||
|                     else: | ||||
|                         rev_data['revert'] = "FALSE" | ||||
|                         rev_data['reverteds'] = "" | ||||
|                         row_buffer['token_revs'].append(num_token_revs) | ||||
|                         row_buffer['tokens_added'].append(num_tokens) | ||||
|                         row_buffer['tokens_removed'].append(len(old_tokens_removed)) | ||||
|                         row_buffer['tokens_window'].append(PERSISTENCE_RADIUS - 1) | ||||
| 
 | ||||
|                 # if the fact that the edit was minor can be hidden, this might be an issue | ||||
|                 rev_data['minor'] = "TRUE" if rev.minor else "FALSE" | ||||
|                 del row_buffer['text'] | ||||
| 
 | ||||
|                 if not rev.deleted.user: | ||||
|                     # wrap user-defined editors in quotes for fread | ||||
|                     rev_data['editor'] = '"' + rev.user.text + '"' | ||||
|                     rev_data['anon'] = "TRUE" if rev.user.id == None else "FALSE" | ||||
|                      | ||||
|                 else: | ||||
|                     rev_data['anon'] = "" | ||||
|                     rev_data['editor'] = "" | ||||
| 
 | ||||
|                 #if re.match(r'^#redirect \[\[.*\]\]', rev.text, re.I): | ||||
|                 #    redirect = True | ||||
|                 #else: | ||||
|                 #    redirect = False | ||||
|                  | ||||
|                 #TODO missing: additions_size deletions_size | ||||
|                  | ||||
|                 # if collapse user was on, lets run that | ||||
|                 if self.collapse_user: | ||||
|                     rev_data['collapsed_revs'] = rev.collapsed_revs | ||||
| 
 | ||||
|                 if self.persist != PersistMethod.none: | ||||
|                     if rev.deleted.text: | ||||
|                         for k in ["token_revs", "tokens_added", "tokens_removed", "tokens_window"]: | ||||
|                             old_rev_data[k] = None | ||||
|                     else: | ||||
| 
 | ||||
|                         if self.persist != PersistMethod.legacy: | ||||
|                             _, tokens_added, tokens_removed = state.update(rev.text, rev.id) | ||||
| 
 | ||||
|                         else: | ||||
|                             _, tokens_added, tokens_removed = state.process(rev.text, rev.id, text_sha1) | ||||
|                              | ||||
|                         window.append((rev.id, rev_data, tokens_added, tokens_removed)) | ||||
|                          | ||||
|                         if len(window) == PERSISTENCE_RADIUS: | ||||
|                             old_rev_id, old_rev_data, old_tokens_added, old_tokens_removed = window[0] | ||||
|                              | ||||
|                             num_token_revs, num_tokens = calculate_persistence(old_tokens_added) | ||||
| 
 | ||||
|                             old_rev_data["token_revs"] = num_token_revs | ||||
|                             old_rev_data["tokens_added"] = num_tokens | ||||
|                             old_rev_data["tokens_removed"] = len(old_tokens_removed) | ||||
|                             old_rev_data["tokens_window"] = PERSISTENCE_RADIUS-1 | ||||
| 
 | ||||
|                             self.print_rev_data(old_rev_data) | ||||
| 
 | ||||
|                 else: | ||||
|                     self.print_rev_data(rev_data) | ||||
| 
 | ||||
|                 rev_count += 1 | ||||
| 
 | ||||
|             if self.persist != PersistMethod.none: | ||||
|                 # print out metadata for the last RADIUS revisions | ||||
|                 for i, item in enumerate(window): | ||||
|                     # if the window was full, we've already printed item 0 | ||||
|                     if len(window) == PERSISTENCE_RADIUS and i == 0: | ||||
|                         continue | ||||
| 
 | ||||
|                     rev_id, rev_data, tokens_added, tokens_removed = item | ||||
|                     rev_id, tokens_added, tokens_removed = item | ||||
|                     num_token_revs, num_tokens = calculate_persistence(tokens_added) | ||||
| 
 | ||||
|                     rev_data["token_revs"] = num_token_revs | ||||
|                     rev_data["tokens_added"] = num_tokens | ||||
|                     rev_data["tokens_removed"] = len(tokens_removed) | ||||
|                     rev_data["tokens_window"] = len(window)-(i+1) | ||||
|                      | ||||
|                     self.print_rev_data(rev_data) | ||||
|                     row_buffer['token_revs'].append(num_token_revs) | ||||
|                     row_buffer['tokens_added'].append(num_tokens) | ||||
|                     row_buffer['tokens_removed'].append(len(tokens_removed)) | ||||
|                     row_buffer['tokens_window'].append(len(window) - (i + 1)) | ||||
| 
 | ||||
|             writer.write(pa.table(row_buffer, schema=schema)) | ||||
| 
 | ||||
|             page_count += 1 | ||||
| 
 | ||||
|         print("Done: %s revisions and %s pages." % (rev_count, page_count), | ||||
|               file=sys.stderr) | ||||
| 
 | ||||
|     def print_rev_data(self, rev_data): | ||||
|         # if it's the first time through, print the header | ||||
|         if self.urlencode: | ||||
|             for field in TO_ENCODE: | ||||
|                 rev_data[field] = quote(str(rev_data[field])) | ||||
| 
 | ||||
|         if not self.printed_header: | ||||
|             print("\t".join([str(k) for k in sorted(rev_data.keys())]), file=self.output_file) | ||||
|             self.printed_header = True | ||||
|          | ||||
|         print("\t".join([str(v) for k, v in sorted(rev_data.items())]), file=self.output_file) | ||||
|         writer.close() | ||||
| 
 | ||||
| 
 | ||||
| def open_input_file(input_filename): | ||||
| def match_archive_suffix(input_filename): | ||||
|     if re.match(r'.*\.7z$', input_filename): | ||||
|         cmd = ["7za", "x", "-so", input_filename, '*']  | ||||
|         cmd = ["7za", "x", "-so", input_filename] | ||||
|     elif re.match(r'.*\.gz$', input_filename): | ||||
|         cmd = ["zcat", input_filename]  | ||||
|         cmd = ["zcat", input_filename] | ||||
|     elif re.match(r'.*\.bz2$', input_filename): | ||||
|         cmd = ["bzcat", "-dk", input_filename]  | ||||
|         cmd = ["bzcat", "-dk", input_filename] | ||||
|     else: | ||||
|         raise ValueError("Unrecognized file type: %s" % input_filename) | ||||
|     return cmd | ||||
| 
 | ||||
| 
 | ||||
| def open_input_file(input_filename, fandom_2020=False): | ||||
|     cmd = match_archive_suffix(input_filename) | ||||
|     if fandom_2020: | ||||
|         cmd.append("*.xml") | ||||
|     try: | ||||
|         input_file = Popen(cmd, stdout=PIPE).stdout | ||||
|         return Popen(cmd, stdout=PIPE).stdout | ||||
|     except NameError: | ||||
|         input_file = open(input_filename, 'r') | ||||
|         return open(input_filename, 'r') | ||||
| 
 | ||||
| 
 | ||||
| def get_output_filename(input_filename, parquet=False) -> str: | ||||
|     output_filename = re.sub(r'\.(7z|gz|bz2)?$', '', input_filename) | ||||
|     output_filename = re.sub(r'\.xml', '', output_filename) | ||||
|     if parquet is False: | ||||
|         output_filename = output_filename + ".tsv" | ||||
|     else: | ||||
|         output_filename = output_filename + ".parquet" | ||||
|     return output_filename | ||||
| 
 | ||||
|     return input_file | ||||
| 
 | ||||
| def open_output_file(input_filename): | ||||
|     # create a regex that creates the output filename | ||||
|     output_filename = re.sub(r'\.(7z|gz|bz2)?$', '', input_filename) | ||||
|     output_filename = re.sub(r'\.xml', '', output_filename) | ||||
|     output_filename = output_filename + ".tsv" | ||||
|     output_filename = get_output_filename(input_filename, parquet=False) | ||||
|     output_file = open(output_filename, "w") | ||||
| 
 | ||||
|     return output_file | ||||
| 
 | ||||
| parser = argparse.ArgumentParser(description='Parse MediaWiki XML database dumps into tab delimitted data.') | ||||
| 
 | ||||
| # arguments for the input direction | ||||
| parser.add_argument('dumpfiles', metavar="DUMPFILE", nargs="*", type=str,  | ||||
|                     help="Filename of the compressed or uncompressed XML database dump. If absent, we'll look for content on stdin and output on stdout.") | ||||
| def main(): | ||||
|     parser = argparse.ArgumentParser(description='Parse MediaWiki XML database dumps into tab delimited data.') | ||||
| 
 | ||||
| parser.add_argument('-o', '--output-dir', metavar='DIR', dest='output_dir', type=str, nargs=1, | ||||
|                     help="Directory for output files.") | ||||
|     # arguments for the input direction | ||||
|     parser.add_argument('dumpfiles', metavar="DUMPFILE", nargs="*", type=str, | ||||
|                         help="Filename of the compressed or uncompressed XML database dump. If absent, we'll look for content on stdin and output on stdout.") | ||||
| 
 | ||||
| parser.add_argument('-s', '--stdout', dest="stdout", action="store_true", | ||||
|                     help="Write output to standard out (do not create dump file)") | ||||
|     parser.add_argument('-o', '--output', metavar='OUTPUT', dest='output', type=str, nargs=1, | ||||
|                         help="Directory for output files. If it ends with .parquet output will be in parquet format.") | ||||
| 
 | ||||
| parser.add_argument('--collapse-user', dest="collapse_user", action="store_true", | ||||
|                     help="Operate only on the final revision made by user a user within all sequences of consecutive edits made by a user. This can be useful for addressing issues with text persistence measures.") | ||||
|     parser.add_argument('-s', '--stdout', dest="stdout", action="store_true", | ||||
|                         help="Write output to standard out (do not create dump file)") | ||||
| 
 | ||||
| parser.add_argument('-p', '--persistence', dest="persist", default=None, const='', type=str, choices = ['','segment','sequence','legacy'], nargs='?', | ||||
|                     help="Compute and report measures of content persistent: (1) persistent token revisions, (2) tokens added, and (3) number of revision used in computing the first measure. This may by slow.  The defualt is -p=sequence, which uses the same algorithm as in the past, but with improvements to wikitext parsing. Use -p=legacy for old behavior used in older research projects. Use -p=segment for advanced persistence calculation method that is robust to content moves, but prone to bugs, and slower.") | ||||
|     parser.add_argument('--collapse-user', dest="collapse_user", action="store_true", | ||||
|                         help="Operate only on the final revision made by user a user within all sequences of consecutive edits made by a user. This can be useful for addressing issues with text persistence measures.") | ||||
| 
 | ||||
| parser.add_argument('-u', '--url-encode', dest="urlencode", action="store_true", | ||||
|                     help="Output url encoded text strings. This works around some data issues like newlines in editor names. In the future it may be used to output other text data.") | ||||
|     parser.add_argument('-p', '--persistence', dest="persist", default=None, const='', type=str, | ||||
|                         choices=['', 'segment', 'sequence', 'legacy'], nargs='?', | ||||
|                         help="Compute and report measures of content persistent: (1) persistent token revisions, (2) tokens added, and (3) number of revision used in computing the first measure. This may by slow.  The default is -p=sequence, which uses the same algorithm as in the past, but with improvements to wikitext parsing. Use -p=legacy for old behavior used in older research projects. Use -p=segment for advanced persistence calculation method that is robust to content moves, but prone to bugs, and slower.") | ||||
| 
 | ||||
| parser.add_argument('-n', '--namespace-include', dest="namespace_filter", type=int, action='append', | ||||
|                     help="Id number of namspace to include. Can be specified more than once.") | ||||
|     parser.add_argument('-n', '--namespace-include', dest="namespace_filter", type=int, action='append', | ||||
|                         help="Id number of namespace to include. Can be specified more than once.") | ||||
| 
 | ||||
| parser.add_argument('-rr', | ||||
|                     '--revert-radius', | ||||
|                     dest="revert_radius", | ||||
|                     type=int, | ||||
|                     action='store', | ||||
|                     default=15, | ||||
|                     help="Number of edits to check when looking for reverts (default: 15)") | ||||
|     parser.add_argument('-rr', | ||||
|                         '--revert-radius', | ||||
|                         dest="revert_radius", | ||||
|                         type=int, | ||||
|                         action='store', | ||||
|                         default=15, | ||||
|                         help="Number of edits to check when looking for reverts (default: 15)") | ||||
| 
 | ||||
| parser.add_argument('-RP', '--revision-pattern', dest="regex_match_revision", default=None, type=str, action='append', | ||||
|                     help="The regular expression to search for in revision text. The regex must be surrounded by quotes.") | ||||
|     parser.add_argument('-RP', '--revision-pattern', dest="regex_match_revision", default=None, type=str, | ||||
|                         action='append', | ||||
|                         help="The regular expression to search for in revision text. The regex must be surrounded by quotes.") | ||||
| 
 | ||||
| parser.add_argument('-RPl', '--revision-pattern-label', dest="regex_revision_label", default=None, type=str, action='append', | ||||
|                     help="The label for the outputted column based on matching the regex in revision text.") | ||||
|     parser.add_argument('-RPl', '--revision-pattern-label', dest="regex_revision_label", default=None, type=str, | ||||
|                         action='append', | ||||
|                         help="The label for the outputted column based on matching the regex in revision text.") | ||||
| 
 | ||||
| parser.add_argument('-CP', '--comment-pattern', dest="regex_match_comment", default=None, type=str, action='append', | ||||
|                     help="The regular expression to search for in comments of revisions.") | ||||
|     parser.add_argument('-CP', '--comment-pattern', dest="regex_match_comment", default=None, type=str, action='append', | ||||
|                         help="The regular expression to search for in comments of revisions.") | ||||
| 
 | ||||
| parser.add_argument('-CPl', '--comment-pattern-label', dest="regex_comment_label", default=None, type=str, action='append', | ||||
|                     help="The label for the outputted column based on matching the regex in comments.") | ||||
|     parser.add_argument('-CPl', '--comment-pattern-label', dest="regex_comment_label", default=None, type=str, | ||||
|                         action='append', | ||||
|                         help="The label for the outputted column based on matching the regex in comments.") | ||||
| 
 | ||||
| args = parser.parse_args() | ||||
|     parser.add_argument('--fandom-2020', dest="fandom_2020", | ||||
|                         action='store_true', | ||||
|                         help="Whether the archive is from the fandom 2020 dumps by Wikiteam. These dumps can have multiple .xml files in their archives.") | ||||
| 
 | ||||
| # set persistence method | ||||
|     args = parser.parse_args() | ||||
| 
 | ||||
| if args.persist is None: | ||||
|     persist = PersistMethod.none | ||||
| elif args.persist == "segment": | ||||
|     persist = PersistMethod.segment | ||||
| elif args.persist == "legacy": | ||||
|     persist = PersistMethod.legacy | ||||
| else: | ||||
|     persist = PersistMethod.sequence | ||||
|     # set persistence method | ||||
| 
 | ||||
| if args.namespace_filter is not None: | ||||
|     namespaces = args.namespace_filter | ||||
| else: | ||||
|     namespaces = None | ||||
|     if args.persist is None: | ||||
|         persist = PersistMethod.none | ||||
|     elif args.persist == "segment": | ||||
|         persist = PersistMethod.segment | ||||
|     elif args.persist == "legacy": | ||||
|         persist = PersistMethod.legacy | ||||
|     else: | ||||
|         persist = PersistMethod.sequence | ||||
| 
 | ||||
| if len(args.dumpfiles) > 0: | ||||
|     for filename in args.dumpfiles: | ||||
|         input_file = open_input_file(filename) | ||||
|     if args.namespace_filter is not None: | ||||
|         namespaces = args.namespace_filter | ||||
|     else: | ||||
|         namespaces = None | ||||
| 
 | ||||
|         # open directory for output | ||||
|         if args.output_dir: | ||||
|             output_dir = args.output_dir[0] | ||||
|         else: | ||||
|             output_dir = "." | ||||
|     if len(args.dumpfiles) > 0: | ||||
|         for filename in args.dumpfiles: | ||||
|             input_file = open_input_file(filename, args.fandom_2020) | ||||
| 
 | ||||
|         print("Processing file: %s" % filename, file=sys.stderr) | ||||
|             # open directory for output | ||||
|             if args.output: | ||||
|                 output = args.output[0] | ||||
|             else: | ||||
|                 output = "." | ||||
| 
 | ||||
|         if args.stdout: | ||||
|             output_file = sys.stdout | ||||
|         else: | ||||
|             filename = os.path.join(output_dir, os.path.basename(filename)) | ||||
|             output_file = open_output_file(filename) | ||||
|             output_parquet = output.endswith(".parquet") | ||||
| 
 | ||||
|         wikiq = WikiqParser(input_file, | ||||
|                             output_file, | ||||
|             print("Processing file: %s" % filename, file=sys.stderr) | ||||
| 
 | ||||
|             if args.stdout: | ||||
|                 # Parquet libraries need a binary output, so just sys.stdout doesn't work. | ||||
|                 output_file = sys.stdout.buffer | ||||
|             elif os.path.isdir(output) or output_parquet: | ||||
|                 filename = os.path.join(output, os.path.basename(filename)) | ||||
|                 output_file = get_output_filename(filename, parquet=output_parquet) | ||||
|             else: | ||||
|                 output_file = output | ||||
| 
 | ||||
|             wikiq = WikiqParser(input_file, | ||||
|                                 output_file, | ||||
|                                 collapse_user=args.collapse_user, | ||||
|                                 persist=persist, | ||||
|                                 namespaces=namespaces, | ||||
|                                 revert_radius=args.revert_radius, | ||||
|                                 regex_match_revision=args.regex_match_revision, | ||||
|                                 regex_revision_label=args.regex_revision_label, | ||||
|                                 regex_match_comment=args.regex_match_comment, | ||||
|                                 regex_comment_label=args.regex_comment_label, | ||||
|                                 output_parquet=output_parquet) | ||||
| 
 | ||||
|             wikiq.process() | ||||
| 
 | ||||
|             # close things | ||||
|             input_file.close() | ||||
| 
 | ||||
|     else: | ||||
|         wikiq = WikiqParser(sys.stdin, | ||||
|                             sys.stdout, | ||||
|                             collapse_user=args.collapse_user, | ||||
|                             persist=persist, | ||||
|                             urlencode=args.urlencode, | ||||
|                             # persist_legacy=args.persist_legacy, | ||||
|                             namespaces=namespaces, | ||||
|                             revert_radius=args.revert_radius, | ||||
|                             regex_match_revision = args.regex_match_revision, | ||||
|                             regex_revision_label = args.regex_revision_label, | ||||
|                             regex_match_comment = args.regex_match_comment, | ||||
|                             regex_comment_label = args.regex_comment_label) | ||||
|                             regex_match_revision=args.regex_match_revision, | ||||
|                             regex_revision_label=args.regex_revision_label, | ||||
|                             regex_match_comment=args.regex_match_comment, | ||||
|                             regex_comment_label=args.regex_comment_label) | ||||
| 
 | ||||
|         wikiq.process() | ||||
| 
 | ||||
|         # close things  | ||||
|         input_file.close() | ||||
|         output_file.close() | ||||
| else: | ||||
|     wikiq = WikiqParser(sys.stdin, | ||||
|                         sys.stdout, | ||||
|                         collapse_user=args.collapse_user, | ||||
|                         persist=persist, | ||||
|                         #persist_legacy=args.persist_legacy, | ||||
|                         urlencode=args.urlencode, | ||||
|                         namespaces=namespaces, | ||||
|                         revert_radius=args.revert_radius, | ||||
|                         regex_match_revision = args.regex_match_revision, | ||||
|                         regex_revision_label = args.regex_revision_label, | ||||
|                         regex_match_comment = args.regex_match_comment, | ||||
|                         regex_comment_label = args.regex_comment_label) | ||||
|     # stop_words = "a,able,about,across,after,all,almost,also,am,among,an,and,any,are,as,at,be,because,been,but,by,can,cannot,could,dear,did,do,does,either,else,ever,every,for,from,get,got,had,has,have,he,her,hers,him,his,how,however,i,if,in,into,is,it,its,just,least,let,like,likely,may,me,might,most,must,my,neither,no,nor,not,of,off,often,on,only,or,other,our,own,rather,said,say,says,she,should,since,so,some,than,that,the,their,them,then,there,these,they,this,tis,to,too,twas,us,wants,was,we,were,what,when,where,which,while,who,whom,why,will,with,would,yet,you,your" | ||||
|     # stop_words = stop_words.split(",") | ||||
| 
 | ||||
|     wikiq.process()  | ||||
| 
 | ||||
| # stop_words = "a,able,about,across,after,all,almost,also,am,among,an,and,any,are,as,at,be,because,been,but,by,can,cannot,could,dear,did,do,does,either,else,ever,every,for,from,get,got,had,has,have,he,her,hers,him,his,how,however,i,if,in,into,is,it,its,just,least,let,like,likely,may,me,might,most,must,my,neither,no,nor,not,of,off,often,on,only,or,other,our,own,rather,said,say,says,she,should,since,so,some,than,that,the,their,them,then,there,these,they,this,tis,to,too,twas,us,wants,was,we,were,what,when,where,which,while,who,whom,why,will,with,would,yet,you,your" | ||||
| # stop_words = stop_words.split(",") | ||||
| if __name__ == "__main__": | ||||
|     main() | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user