Initial commit
p# new file: runwikiq.sh
This commit is contained in:
@@ -0,0 +1,4 @@
|
||||
# from . import errors
|
||||
from .db import DB
|
||||
from .collections import Pages, RecentChanges, Revisions, Archives, \
|
||||
AllRevisions, Users
|
||||
@@ -0,0 +1,4 @@
|
||||
from .pages import Pages
|
||||
from .recent_changes import RecentChanges
|
||||
from .revisions import Revisions, Archives, AllRevisions
|
||||
from .users import Users
|
||||
@@ -0,0 +1,11 @@
|
||||
class Collection:
|
||||
DIRECTIONS = {'newer', 'older'}
|
||||
|
||||
def __init__(self, db):
|
||||
self.db = db
|
||||
|
||||
def __str__(self):
|
||||
return self.__repr__()
|
||||
|
||||
def __repr__(self):
|
||||
return "{0}({1})".format(self.__class__.__name__, repr(self.db))
|
||||
@@ -0,0 +1,65 @@
|
||||
import logging
|
||||
|
||||
from ...util import none_or
|
||||
from .collection import Collection
|
||||
|
||||
logger = logging.getLogger("mw.database.collections.pages")
|
||||
|
||||
|
||||
class Pages(Collection):
|
||||
def get(self, page_id=None, namespace_title=None, rev_id=None):
|
||||
"""
|
||||
Gets a single page based on a legitimate identifier of the page. Note
|
||||
that namespace_title expects a tuple of namespace ID and title.
|
||||
|
||||
:Parameters:
|
||||
page_id : int
|
||||
Page ID
|
||||
namespace_title : ( int, str )
|
||||
the page's namespace ID and title
|
||||
rev_id : int
|
||||
a revision ID included in the page's history
|
||||
|
||||
:Returns:
|
||||
iterator over result rows
|
||||
"""
|
||||
|
||||
page_id = none_or(page_id, int)
|
||||
namespace_title = none_or(namespace_title, tuple)
|
||||
rev_id = none_or(rev_id, int)
|
||||
|
||||
query = """
|
||||
SELECT page.*
|
||||
FROM page
|
||||
"""
|
||||
values = []
|
||||
|
||||
if page_id is not None:
|
||||
query += """
|
||||
WHERE page_id = %s
|
||||
"""
|
||||
values.append(page_id)
|
||||
|
||||
if namespace_title is not None:
|
||||
namespace, title = namespace_title
|
||||
|
||||
query += " WHERE page_namespace = %s and page_title = %s "
|
||||
values.extend([int(namespace), str(title)])
|
||||
|
||||
elif rev_id is not None:
|
||||
query += """
|
||||
WHERE page_id = (SELECT rev_page FROM revision WHERE rev_id = %s)
|
||||
"""
|
||||
values.append(rev_id)
|
||||
|
||||
else:
|
||||
raise TypeError("Must specify a page identifier.")
|
||||
|
||||
cursor = self.db.shared_connection.cursor()
|
||||
cursor.execute(
|
||||
query,
|
||||
values
|
||||
)
|
||||
|
||||
for row in cursor:
|
||||
return row
|
||||
@@ -0,0 +1,128 @@
|
||||
import logging
|
||||
import time
|
||||
|
||||
from ...types import Timestamp
|
||||
from ...util import none_or
|
||||
from .collection import Collection
|
||||
|
||||
logger = logging.getLogger("mw.database.collections.pages")
|
||||
|
||||
|
||||
class RecentChanges(Collection):
|
||||
# (https://www.mediawiki.org/wiki/Manual:Recentchanges_table)
|
||||
TYPES = {
|
||||
'edit': 0, # edit of existing page
|
||||
'new': 1, # new page
|
||||
'move': 2, # Marked as obsolete
|
||||
'log': 3, # log action (introduced in MediaWiki 1.2)
|
||||
'move_over_redirect': 4, # Marked as obsolete
|
||||
'external': 5 # An external recent change. Primarily used by Wikidata
|
||||
}
|
||||
|
||||
def listen(self, last=None, types=None, max_wait=5):
|
||||
"""
|
||||
Listens to the recent changes table. Given no parameters, this function
|
||||
will return an iterator over the entire recentchanges table and then
|
||||
continue to "listen" for new changes to come in every 5 seconds.
|
||||
|
||||
:Parameters:
|
||||
last : dict
|
||||
a recentchanges row to pick up after
|
||||
types : set ( str )
|
||||
a set of recentchanges types to filter for
|
||||
max_wait : float
|
||||
the maximum number of seconds to wait between repeated queries
|
||||
|
||||
:Returns:
|
||||
A never-ending iterator over change rows.
|
||||
"""
|
||||
while True:
|
||||
if last is not None:
|
||||
after = last['rc_timestamp']
|
||||
after_id = last['rc_id']
|
||||
else:
|
||||
after = None
|
||||
after_id = None
|
||||
|
||||
start = time.time()
|
||||
rcs = self.query(after=after, after_id=after_id, direction="newer")
|
||||
|
||||
count = 0
|
||||
for rc in rcs:
|
||||
yield rc
|
||||
count += 1
|
||||
|
||||
time.sleep(max_wait - (time.time() - start))
|
||||
|
||||
def query(self, before=None, after=None, before_id=None, after_id=None,
|
||||
types=None, direction=None, limit=None):
|
||||
"""
|
||||
Queries the ``recentchanges`` table. See
|
||||
`<https://www.mediawiki.org/wiki/Manual:Recentchanges_table>`_
|
||||
|
||||
:Parameters:
|
||||
before : :class:`mw.Timestamp`
|
||||
The maximum timestamp
|
||||
after : :class:`mw.Timestamp`
|
||||
The minimum timestamp
|
||||
before_id : int
|
||||
The minimum ``rc_id``
|
||||
after_id : int
|
||||
The maximum ``rc_id``
|
||||
types : set ( str )
|
||||
Which types of changes to return?
|
||||
|
||||
* ``edit`` -- Edits to existing pages
|
||||
* ``new`` -- Edits that create new pages
|
||||
* ``move`` -- (obsolete)
|
||||
* ``log`` -- Log actions (introduced in MediaWiki 1.2)
|
||||
* ``move_over_redirect`` -- (obsolete)
|
||||
* ``external`` -- An external recent change. Primarily used by Wikidata
|
||||
|
||||
direction : str
|
||||
"older" or "newer"
|
||||
limit : int
|
||||
limit the number of records returned
|
||||
"""
|
||||
before = none_or(before, Timestamp)
|
||||
after = none_or(after, Timestamp)
|
||||
before_id = none_or(before_id, int)
|
||||
after_id = none_or(after_id, int)
|
||||
types = none_or(types, levels=self.TYPES)
|
||||
direction = none_or(direction, levels=self.DIRECTIONS)
|
||||
limit = none_or(limit, int)
|
||||
|
||||
query = """
|
||||
SELECT * FROM recentchanges
|
||||
WHERE 1
|
||||
"""
|
||||
values = []
|
||||
|
||||
if before is not None:
|
||||
query += " AND rc_timestamp < %s "
|
||||
values.append(before.short_format())
|
||||
if after is not None:
|
||||
query += " AND rc_timestamp < %s "
|
||||
values.append(after.short_format())
|
||||
if before_id is not None:
|
||||
query += " AND rc_id < %s "
|
||||
values.append(before_id)
|
||||
if after_id is not None:
|
||||
query += " AND rc_id < %s "
|
||||
values.append(after_id)
|
||||
if types is not None:
|
||||
query += " AND rc_type IN ({0}) ".format(
|
||||
",".join(self.TYPES[t] for t in types)
|
||||
)
|
||||
|
||||
if direction is not None:
|
||||
direction = ("ASC " if direction == "newer" else "DESC ")
|
||||
query += " ORDER BY rc_timestamp {0}, rc_id {0}".format(dir)
|
||||
|
||||
if limit is not None:
|
||||
query += " LIMIT %s "
|
||||
values.append(limit)
|
||||
|
||||
cursor.execute(query, values)
|
||||
for row in cursor:
|
||||
yield row
|
||||
@@ -0,0 +1,410 @@
|
||||
import logging
|
||||
import time
|
||||
from itertools import chain
|
||||
|
||||
from ...types import Timestamp
|
||||
from ...util import iteration, none_or
|
||||
from .collection import Collection
|
||||
|
||||
logger = logging.getLogger("mw.database.collections.revisions")
|
||||
|
||||
|
||||
class AllRevisions(Collection):
|
||||
def get(self, rev_id, include_page=False):
|
||||
"""
|
||||
Gets a single revisions by ID. Checks both the ``revision`` and
|
||||
``archive`` tables. This method throws a :class:`KeyError` if a
|
||||
revision cannot be found.
|
||||
|
||||
:Parameters:
|
||||
rev_id : int
|
||||
Revision ID
|
||||
include_page : bool
|
||||
Join revision returned against ``page``
|
||||
|
||||
:Returns:
|
||||
A revision row
|
||||
"""
|
||||
rev_id = int(rev_id)
|
||||
try:
|
||||
rev_row = self.db.revisions.get(rev_id, include_page=include_page)
|
||||
except KeyError as e:
|
||||
rev_row = self.db.archives.get(rev_id)
|
||||
|
||||
return rev_row
|
||||
|
||||
def query(self, *args, **kwargs):
|
||||
"""
|
||||
Queries revisions (excludes revisions to deleted pages)
|
||||
|
||||
:Parameters:
|
||||
page_id : int
|
||||
Page identifier. Filter revisions to this page.
|
||||
user_id : int
|
||||
User identifier. Filter revisions to those made by this user.
|
||||
user_text : str
|
||||
User text (user_name or IP address). Filter revisions to those
|
||||
made by this user.
|
||||
before : :class:`mw.Timestamp`
|
||||
Filter revisions to those made before this timestamp.
|
||||
after : :class:`mw.Timestamp`
|
||||
Filter revisions to those made after this timestamp.
|
||||
before_id : int
|
||||
Filter revisions to those with an ID before this ID
|
||||
after_id : int
|
||||
Filter revisions to those with an ID after this ID
|
||||
direction : str
|
||||
"newer" or "older"
|
||||
limit : int
|
||||
Limit the number of results
|
||||
include_page : bool
|
||||
Join revisions returned against ``page``
|
||||
|
||||
:Returns:
|
||||
An iterator over revision rows.
|
||||
"""
|
||||
|
||||
revisions = self.db.revisions.query(*args, **kwargs)
|
||||
archives = self.db.archives.query(*args, **kwargs)
|
||||
|
||||
if 'direction' in kwargs:
|
||||
direction = kwargs['direction']
|
||||
if direction not in self.DIRECTIONS:
|
||||
raise TypeError("direction must be in {0}".format(self.DIRECTIONS))
|
||||
|
||||
if direction == "newer":
|
||||
collated_revisions = iteration.sequence(
|
||||
revisions,
|
||||
archives,
|
||||
compare=lambda r1, r2:\
|
||||
(r1['rev_timestamp'], r1['rev_id']) <=
|
||||
(r2['rev_timestamp'], r2['rev_id'])
|
||||
)
|
||||
else: # direction == "older"
|
||||
collated_revisions = iteration.sequence(
|
||||
revisions,
|
||||
archives,
|
||||
compare=lambda r1, r2:\
|
||||
(r1['rev_timestamp'], r1['rev_id']) >=
|
||||
(r2['rev_timestamp'], r2['rev_id'])
|
||||
)
|
||||
else:
|
||||
collated_revisions = chain(revisions, archives)
|
||||
|
||||
if 'limit' in kwargs:
|
||||
limit = kwargs['limit']
|
||||
|
||||
for i, rev in enumerate(collated_revisions):
|
||||
yield rev
|
||||
if i >= limit:
|
||||
break
|
||||
|
||||
else:
|
||||
for rev in collated_revisions:
|
||||
yield rev
|
||||
|
||||
|
||||
class Revisions(Collection):
|
||||
|
||||
def get(self, rev_id, include_page=False):
|
||||
"""
|
||||
Gets a single revisions by ID. Checks the ``revision`` table. This
|
||||
method throws a :class:`KeyError` if a revision cannot be found.
|
||||
|
||||
:Parameters:
|
||||
rev_id : int
|
||||
Revision ID
|
||||
include_page : bool
|
||||
Join revision returned against ``page``
|
||||
|
||||
:Returns:
|
||||
A revision row
|
||||
"""
|
||||
rev_id = int(rev_id)
|
||||
|
||||
query = """
|
||||
SELECT *, FALSE AS archived FROM revision
|
||||
"""
|
||||
if include_page:
|
||||
query += """
|
||||
INNER JOIN page ON page_id = rev_page
|
||||
"""
|
||||
|
||||
query += " WHERE rev_id = %s"
|
||||
|
||||
cursor.execute(query, [rev_id])
|
||||
|
||||
for row in cursor:
|
||||
return row
|
||||
|
||||
raise KeyError(rev_id)
|
||||
|
||||
def query(self, page_id=None, user_id=None, user_text=None,
|
||||
before=None, after=None, before_id=None, after_id=None,
|
||||
direction=None, limit=None, include_page=False):
|
||||
"""
|
||||
Queries revisions (excludes revisions to deleted pages)
|
||||
|
||||
:Parameters:
|
||||
page_id : int
|
||||
Page identifier. Filter revisions to this page.
|
||||
user_id : int
|
||||
User identifier. Filter revisions to those made by this user.
|
||||
user_text : str
|
||||
User text (user_name or IP address). Filter revisions to those
|
||||
made by this user.
|
||||
before : :class:`mw.Timestamp`
|
||||
Filter revisions to those made before this timestamp.
|
||||
after : :class:`mw.Timestamp`
|
||||
Filter revisions to those made after this timestamp.
|
||||
before_id : int
|
||||
Filter revisions to those with an ID before this ID
|
||||
after_id : int
|
||||
Filter revisions to those with an ID after this ID
|
||||
direction : str
|
||||
"newer" or "older"
|
||||
limit : int
|
||||
Limit the number of results
|
||||
include_page : bool
|
||||
Join revisions returned against ``page``
|
||||
|
||||
:Returns:
|
||||
An iterator over revision rows.
|
||||
"""
|
||||
start_time = time.time()
|
||||
|
||||
page_id = none_or(page_id, int)
|
||||
user_id = none_or(user_id, int)
|
||||
user_text = none_or(user_text, str)
|
||||
before = none_or(before, Timestamp)
|
||||
after = none_or(after, Timestamp)
|
||||
before_id = none_or(before_id, int)
|
||||
after_id = none_or(after_id, int)
|
||||
direction = none_or(direction, levels=self.DIRECTIONS)
|
||||
include_page = bool(include_page)
|
||||
|
||||
query = """
|
||||
SELECT *, FALSE AS archived FROM revision
|
||||
"""
|
||||
|
||||
if include_page:
|
||||
query += """
|
||||
INNER JOIN page ON page_id = rev_page
|
||||
"""
|
||||
|
||||
query += """
|
||||
WHERE 1
|
||||
"""
|
||||
values = []
|
||||
|
||||
if page_id is not None:
|
||||
query += " AND rev_page = %s "
|
||||
values.append(page_id)
|
||||
if user_id is not None:
|
||||
query += " AND rev_user = %s "
|
||||
values.append(user_id)
|
||||
if user_text is not None:
|
||||
query += " AND rev_user_text = %s "
|
||||
values.append(user_text)
|
||||
if before is not None:
|
||||
query += " AND rev_timestamp < %s "
|
||||
values.append(before.short_format())
|
||||
if after is not None:
|
||||
query += " AND rev_timestamp > %s "
|
||||
values.append(after.short_format())
|
||||
if before_id is not None:
|
||||
query += " AND rev_id < %s "
|
||||
values.append(before_id)
|
||||
if after_id is not None:
|
||||
query += " AND rev_id > %s "
|
||||
values.append(after_id)
|
||||
|
||||
if direction is not None:
|
||||
|
||||
direction = ("ASC " if direction == "newer" else "DESC ")
|
||||
|
||||
if before_id != None or after_id != None:
|
||||
query += " ORDER BY rev_id {0}, rev_timestamp {0}".format(direction)
|
||||
else:
|
||||
query += " ORDER BY rev_timestamp {0}, rev_id {0}".format(direction)
|
||||
|
||||
if limit is not None:
|
||||
query += " LIMIT %s "
|
||||
values.append(limit)
|
||||
|
||||
cursor = self.db.shared_connection.cursor()
|
||||
cursor.execute(query, values)
|
||||
count = 0
|
||||
for row in cursor:
|
||||
yield row
|
||||
count += 1
|
||||
|
||||
logger.debug("%s revisions read in %s seconds" % (count, time.time() - start_time))
|
||||
|
||||
|
||||
class Archives(Collection):
|
||||
def get(self, rev_id):
|
||||
"""
|
||||
Gets a single revisions by ID. Checks the ``archive`` table. This
|
||||
method throws a :class:`KeyError` if a revision cannot be found.
|
||||
|
||||
:Parameters:
|
||||
rev_id : int
|
||||
Revision ID
|
||||
|
||||
:Returns:
|
||||
A revision row
|
||||
"""
|
||||
rev_id = int(rev_id)
|
||||
|
||||
query = """
|
||||
SELECT
|
||||
ar_id,
|
||||
ar_rev_id AS rev_id,
|
||||
ar_page_id AS rev_page,
|
||||
ar_page_id AS page_id,
|
||||
ar_title AS page_title,
|
||||
ar_namespace AS page_namespace,
|
||||
ar_text_id AS rev_text_id,
|
||||
ar_comment AS rev_comment,
|
||||
ar_user AS rev_user,
|
||||
ar_user_text AS rev_user_text,
|
||||
ar_timestamp AS rev_timestamp,
|
||||
ar_minor_edit AS rev_minor_edit,
|
||||
ar_deleted AS rev_deleted,
|
||||
ar_len AS rev_len,
|
||||
ar_parent_id AS rev_parent_id,
|
||||
ar_sha1 AS rev_sha1,
|
||||
TRUE AS archived
|
||||
FROM archive
|
||||
WHERE ar_rev_id = %s
|
||||
"""
|
||||
|
||||
cursor.execute(query, [rev_id])
|
||||
for row in cursor:
|
||||
return row
|
||||
|
||||
raise KeyError(rev_id)
|
||||
|
||||
def query(self, page_id=None, user_id=None, user_text=None,
|
||||
before=None, after=None, before_id=None, after_id=None,
|
||||
before_ar_id=None, after_ar_id=None,
|
||||
direction=None, limit=None, include_page=True):
|
||||
"""
|
||||
Queries archived revisions (revisions of deleted pages)
|
||||
|
||||
:Parameters:
|
||||
page_id : int
|
||||
Page identifier. Filter revisions to this page.
|
||||
user_id : int
|
||||
User identifier. Filter revisions to those made by this user.
|
||||
user_text : str
|
||||
User text (user_name or IP address). Filter revisions to those
|
||||
made by this user.
|
||||
before : :class:`mw.Timestamp`
|
||||
Filter revisions to those made before this timestamp.
|
||||
after : :class:`mw.Timestamp`
|
||||
Filter revisions to those made after this timestamp.
|
||||
before_id : int
|
||||
Filter revisions to those with an ID before this ID
|
||||
after_id : int
|
||||
Filter revisions to those with an ID after this ID
|
||||
direction : str
|
||||
"newer" or "older"
|
||||
limit : int
|
||||
Limit the number of results
|
||||
include_page : bool
|
||||
This field is ignored. It's only here for compatibility with
|
||||
:class:`mw.database.Revision`.
|
||||
|
||||
:Returns:
|
||||
An iterator over revision rows.
|
||||
"""
|
||||
page_id = none_or(page_id, int)
|
||||
user_id = none_or(user_id, int)
|
||||
before = none_or(before, Timestamp)
|
||||
after = none_or(after, Timestamp)
|
||||
before_id = none_or(before_id, int)
|
||||
after_id = none_or(after_id, int)
|
||||
direction = none_or(direction, levels=self.DIRECTIONS)
|
||||
limit = none_or(limit, int)
|
||||
|
||||
start_time = time.time()
|
||||
cursor = self.db.shared_connection.cursor()
|
||||
|
||||
query = """
|
||||
SELECT
|
||||
ar_id,
|
||||
ar_rev_id AS rev_id,
|
||||
ar_page_id AS rev_page,
|
||||
ar_page_id AS page_id,
|
||||
ar_title AS page_title,
|
||||
ar_namespace AS page_namespace,
|
||||
ar_text_id AS rev_text_id,
|
||||
ar_comment AS rev_comment,
|
||||
ar_user AS rev_user,
|
||||
ar_user_text AS rev_user_text,
|
||||
ar_timestamp AS rev_timestamp,
|
||||
ar_minor_edit AS rev_minor_edit,
|
||||
ar_deleted AS rev_deleted,
|
||||
ar_len AS rev_len,
|
||||
ar_parent_id AS rev_parent_id,
|
||||
ar_sha1 AS rev_sha1,
|
||||
TRUE AS archived
|
||||
FROM archive
|
||||
"""
|
||||
|
||||
query += """
|
||||
WHERE 1
|
||||
"""
|
||||
values = []
|
||||
|
||||
if page_id is not None:
|
||||
query += " AND ar_page_id = %s "
|
||||
values.append(page_id)
|
||||
if user_id is not None:
|
||||
query += " AND ar_user = %s "
|
||||
values.append(user_id)
|
||||
if user_text is not None:
|
||||
query += " AND ar_user_text = %s "
|
||||
values.append(user_text)
|
||||
if before is not None:
|
||||
query += " AND ar_timestamp < %s "
|
||||
values.append(before.short_format())
|
||||
if after is not None:
|
||||
query += " AND ar_timestamp > %s "
|
||||
values.append(after.short_format())
|
||||
if before_id is not None:
|
||||
query += " AND ar_rev_id < %s "
|
||||
values.append(before_id)
|
||||
if after_id is not None:
|
||||
query += " AND ar_rev_id > %s "
|
||||
values.append(after_id)
|
||||
if before_ar_id is not None:
|
||||
query += " AND ar_id < ? "
|
||||
values.append(before_ar_id)
|
||||
if after_ar_id is not None:
|
||||
query += " AND ar_id > ? "
|
||||
values.append(after_ar_id)
|
||||
|
||||
if direction is not None:
|
||||
dir = ("ASC " if direction == "newer" else "DESC ")
|
||||
|
||||
if before is not None or after is not None:
|
||||
query += " ORDER BY ar_timestamp {0}, ar_rev_id {0}".format(dir)
|
||||
elif before_id is not None or after_id is not None:
|
||||
query += " ORDER BY ar_rev_id {0}, ar_timestamp {0}".format(dir)
|
||||
else:
|
||||
query += " ORDER BY ar_id {0}".format(dir)
|
||||
|
||||
if limit is not None:
|
||||
query += " LIMIT %s "
|
||||
values.append(limit)
|
||||
|
||||
cursor.execute(query, values)
|
||||
count = 0
|
||||
for row in cursor:
|
||||
yield row
|
||||
count += 1
|
||||
|
||||
logger.debug("%s revisions read in %s seconds" % (count, time.time() - start_time))
|
||||
@@ -0,0 +1,154 @@
|
||||
import logging
|
||||
import time
|
||||
|
||||
from ...types import Timestamp
|
||||
from ...util import none_or
|
||||
from .collection import Collection
|
||||
|
||||
logger = logging.getLogger("mw.database.collections.users")
|
||||
|
||||
|
||||
class Users(Collection):
|
||||
CREATION_ACTIONS = {'newusers', 'create', 'create2', 'autocreate',
|
||||
'byemail'}
|
||||
|
||||
def get(self, user_id=None, user_name=None):
|
||||
"""
|
||||
Gets a single user row from the database. Raises a :class:`KeyError`
|
||||
if a user cannot be found.
|
||||
|
||||
:Parameters:
|
||||
user_id : int
|
||||
User ID
|
||||
user_name : str
|
||||
User's name
|
||||
|
||||
:Returns:
|
||||
A user row.
|
||||
"""
|
||||
user_id = none_or(user_id, int)
|
||||
user_name = none_or(user_name, str)
|
||||
|
||||
query = """
|
||||
SELECT user.*
|
||||
FROM user
|
||||
"""
|
||||
values = []
|
||||
|
||||
if user_id is not None:
|
||||
query += """
|
||||
WHERE user_id = %s
|
||||
"""
|
||||
values.append(user_id)
|
||||
|
||||
elif user_name is not None:
|
||||
query += """
|
||||
WHERE user_name = %s
|
||||
"""
|
||||
values.append(user_name)
|
||||
|
||||
else:
|
||||
raise TypeError("Must specify a user identifier.")
|
||||
|
||||
cursor = self.db.shared_connection.cursor()
|
||||
cursor.execute(
|
||||
query,
|
||||
values
|
||||
)
|
||||
|
||||
for row in cursor:
|
||||
return row
|
||||
|
||||
raise KeyError(user_id if user_id is not None else user_name)
|
||||
|
||||
def query(self, registered_before=None, registered_after=None,
|
||||
before_id=None, after_id=None, limit=None,
|
||||
direction=None, self_created_only=False):
|
||||
"""
|
||||
Queries users based on various filtering parameters.
|
||||
|
||||
:Parameters:
|
||||
registered_before : :class:`mw.Timestamp`
|
||||
A timestamp to search before (inclusive)
|
||||
registered_after : :class:`mw.Timestamp`
|
||||
A timestamp to search after (inclusive)
|
||||
before_id : int
|
||||
A user_id to search before (inclusive)
|
||||
after_id : int
|
||||
A user_ud to search after (inclusive)
|
||||
direction : str
|
||||
"newer" or "older"
|
||||
limit : int
|
||||
Limit the results to at most this number
|
||||
self_creations_only : bool
|
||||
limit results to self_created user accounts
|
||||
|
||||
:Returns:
|
||||
an iterator over ``user`` table rows
|
||||
"""
|
||||
start_time = time.time()
|
||||
|
||||
registered_before = none_or(registered_before, Timestamp)
|
||||
registered_after = none_or(registered_after, Timestamp)
|
||||
before_id = none_or(before_id, str)
|
||||
after_id = none_or(after_id, str)
|
||||
direction = none_or(direction, levels=self.DIRECTIONS)
|
||||
limit = none_or(limit, int)
|
||||
self_created_only = bool(self_created_only)
|
||||
|
||||
query = """
|
||||
SELECT user.*
|
||||
FROM user
|
||||
"""
|
||||
values = []
|
||||
|
||||
if self_created_only:
|
||||
query += """
|
||||
INNER JOIN logging ON
|
||||
log_user = user_id
|
||||
log_type = "newusers" AND
|
||||
log_action = "create"
|
||||
"""
|
||||
|
||||
query += "WHERE 1 "
|
||||
|
||||
if registered_before is not None:
|
||||
query += "AND user_registration <= %s "
|
||||
values.append(registered_before.short_format())
|
||||
if registered_after is not None:
|
||||
query += "AND user_registration >= %s "
|
||||
values.append(registered_after.short_format())
|
||||
if before_id is not None:
|
||||
query += "AND user_id <= %s "
|
||||
values.append(before_id)
|
||||
if after_id is not None:
|
||||
query += "AND user_id >= %s "
|
||||
values.append(after_id)
|
||||
|
||||
query += "GROUP BY user_id " # In case of duplicate log events
|
||||
|
||||
if direction is not None:
|
||||
if registered_before is not None or registered_after is not None:
|
||||
if direction == "newer":
|
||||
query += "ORDER BY user_registration ASC "
|
||||
else:
|
||||
query += "ORDER BY user_registration DESC "
|
||||
else:
|
||||
if direction == "newer":
|
||||
query += "ORDER BY user_id ASC "
|
||||
else:
|
||||
query += "ORDER BY user_id DESC "
|
||||
|
||||
if limit is not None:
|
||||
query += "LIMIT %s "
|
||||
values.append(limit)
|
||||
|
||||
cursor = self.db.shared_connection.cursor()
|
||||
cursor.execute(query, values)
|
||||
|
||||
count = 0
|
||||
for row in cursor:
|
||||
yield row
|
||||
count += 1
|
||||
|
||||
logger.debug("%s users queried in %s seconds" % (count, time.time() - start_time))
|
||||
134
mediawiki_dump_tools/Mediawiki-Utilities/mw/database/db.py
Normal file
134
mediawiki_dump_tools/Mediawiki-Utilities/mw/database/db.py
Normal file
@@ -0,0 +1,134 @@
|
||||
import getpass
|
||||
import logging
|
||||
import os
|
||||
|
||||
import pymysql
|
||||
import pymysql.cursors
|
||||
|
||||
from .collections import AllRevisions, Archives, Pages, Revisions, Users
|
||||
|
||||
logger = logging.getLogger("mw.database.db")
|
||||
|
||||
|
||||
class DB:
|
||||
"""
|
||||
Represents a connection to a MySQL database.
|
||||
|
||||
:Parameters:
|
||||
connection = :class:`oursql.Connection`
|
||||
A connection to a MediaWiki database
|
||||
"""
|
||||
|
||||
def __init__(self, connection):
|
||||
self.shared_connection = connection
|
||||
self.shared_connection.cursorclass = pymysql.cursors.DictCursor
|
||||
|
||||
self.revisions = Revisions(self)
|
||||
"""
|
||||
An instance of :class:`mw.database.Revisions`.
|
||||
"""
|
||||
|
||||
self.archives = Archives(self)
|
||||
"""
|
||||
An instance of :class:`mw.database.Archives`.
|
||||
"""
|
||||
|
||||
self.all_revisions = AllRevisions(self)
|
||||
"""
|
||||
An instance of :class:`mw.database.AllRevisions`.
|
||||
"""
|
||||
|
||||
self.pages = Pages(self)
|
||||
"""
|
||||
An instance of :class:`mw.database.Pages`.
|
||||
"""
|
||||
|
||||
self.users = Users(self)
|
||||
"""
|
||||
An instance of :class:`mw.database.Users`.
|
||||
"""
|
||||
|
||||
def __repr__(self):
|
||||
return "%s(%s)" % (
|
||||
self.__class__.__name__,
|
||||
", ".join(
|
||||
[repr(arg) for arg in self.args] +
|
||||
["%s=%r" % (k, v) for k, v in self.kwargs.items()]
|
||||
)
|
||||
)
|
||||
|
||||
def __str__(self):
|
||||
return self.__repr__()
|
||||
|
||||
@classmethod
|
||||
def add_arguments(cls, parser, defaults=None):
|
||||
"""
|
||||
Adds the arguments to an :class:`argparse.ArgumentParser` in order to
|
||||
create a database connection.
|
||||
"""
|
||||
defaults = defaults if defaults is not None else defaults
|
||||
|
||||
default_host = defaults.get('host', "localhost")
|
||||
parser.add_argument(
|
||||
'--host', '-h',
|
||||
help="MySQL database host to connect to (defaults to {0})".format(default_host),
|
||||
default=default_host
|
||||
)
|
||||
|
||||
default_database = defaults.get('database', getpass.getuser())
|
||||
parser.add_argument(
|
||||
'--database', '-d',
|
||||
help="MySQL database name to connect to (defaults to {0})".format(default_database),
|
||||
default=default_database
|
||||
)
|
||||
|
||||
default_defaults_file = defaults.get('defaults-file', os.path.expanduser("~/.my.cnf"))
|
||||
parser.add_argument(
|
||||
'--defaults-file',
|
||||
help="MySQL defaults file (defaults to {0})".format(default_defaults_file),
|
||||
default=default_defaults_file
|
||||
)
|
||||
|
||||
default_user = defaults.get('user', getpass.getuser())
|
||||
parser.add_argument(
|
||||
'--user', '-u',
|
||||
help="MySQL user (defaults to %s)".format(default_user),
|
||||
default=default_user
|
||||
)
|
||||
return parser
|
||||
|
||||
@classmethod
|
||||
def from_arguments(cls, args):
|
||||
"""
|
||||
Constructs a :class:`~mw.database.DB`.
|
||||
Consumes :class:`argparse.ArgumentParser` arguments given by
|
||||
:meth:`add_arguments` in order to create a :class:`DB`.
|
||||
|
||||
:Parameters:
|
||||
args : :class:`argparse.Namespace`
|
||||
A collection of argument values returned by :class:`argparse.ArgumentParser`'s :meth:`parse_args()`
|
||||
"""
|
||||
connection = pymysql.connect(
|
||||
args.host,
|
||||
args.user,
|
||||
database=args.database,
|
||||
read_default_file=args.defaults_file
|
||||
)
|
||||
return cls(connection)
|
||||
|
||||
@classmethod
|
||||
def from_params(cls, *args, **kwargs):
|
||||
"""
|
||||
Constructs a :class:`~mw.database.DB`. Passes `*args` and `**kwargs`
|
||||
to :meth:`oursql.connect` and configures the connection.
|
||||
|
||||
:Parameters:
|
||||
args : :class:`argparse.Namespace`
|
||||
A collection of argument values returned by :class:`argparse.ArgumentParser`'s :meth:`parse_args()`
|
||||
"""
|
||||
kwargs['cursorclass'] = pymysql.cursors.DictCursor
|
||||
if kwargs['db']:
|
||||
kwargs['database'] = kwargs['db']
|
||||
del kwargs['db']
|
||||
connection = pymysql.connect(*args, **kwargs)
|
||||
return cls(connection)
|
||||
Reference in New Issue
Block a user