Initial commit
p# new file: runwikiq.sh
This commit is contained in:
@@ -0,0 +1,5 @@
|
||||
from . import errors
|
||||
from .session import Session
|
||||
|
||||
from .collections import Pages, RecentChanges, Revisions, SiteInfo, \
|
||||
UserContribs, DeletedRevisions
|
||||
@@ -0,0 +1,7 @@
|
||||
from .deleted_revisions import DeletedRevisions
|
||||
from .pages import Pages
|
||||
from .recent_changes import RecentChanges
|
||||
from .revisions import Revisions
|
||||
from .site_info import SiteInfo
|
||||
from .user_contribs import UserContribs
|
||||
from .users import Users
|
||||
@@ -0,0 +1,68 @@
|
||||
import re
|
||||
|
||||
|
||||
class Collection:
|
||||
"""
|
||||
Represents a collection of items that can be queried via the API. This is
|
||||
an abstract base class that should be extended
|
||||
"""
|
||||
|
||||
TIMESTAMP = re.compile(r"[0-9]{4}-?[0-9]{2}-?[0-9]{2}T?" +
|
||||
r"[0-9]{2}:?[0-9]{2}:?[0-9]{2}Z?")
|
||||
"""
|
||||
A regular expression for matching the API's timestamp format.
|
||||
"""
|
||||
|
||||
DIRECTIONS = {'newer', 'older'}
|
||||
"""
|
||||
A set of potential direction names.
|
||||
"""
|
||||
|
||||
def __init__(self, session):
|
||||
"""
|
||||
:Parameters:
|
||||
session : `mw.api.Session`
|
||||
An api session to use for post & get.
|
||||
"""
|
||||
self.session = session
|
||||
|
||||
def _check_direction(self, direction):
|
||||
if direction is None:
|
||||
return direction
|
||||
else:
|
||||
direction = str(direction)
|
||||
|
||||
assert direction in {None} | self.DIRECTIONS, \
|
||||
"Direction must be one of {0}".format(self.DIRECTIONS)
|
||||
|
||||
return direction
|
||||
|
||||
def _check_timestamp(self, timestamp):
|
||||
if timestamp is None:
|
||||
return timestamp
|
||||
else:
|
||||
timestamp = str(timestamp)
|
||||
|
||||
if not self.TIMESTAMP.match(timestamp):
|
||||
raise TypeError(
|
||||
"{0} is not formatted like ".format(repr(timestamp)) +
|
||||
"a MediaWiki timestamp."
|
||||
)
|
||||
|
||||
return timestamp
|
||||
|
||||
def _items(self, items, none=True, levels=None, type=lambda val: val):
|
||||
|
||||
if none and items is None:
|
||||
return None
|
||||
else:
|
||||
items = {str(type(item)) for item in items}
|
||||
|
||||
if levels is not None:
|
||||
levels = {str(level) for level in levels}
|
||||
|
||||
assert len(items - levels) == 0, \
|
||||
"items {0} not in levels {1}".format(
|
||||
items - levels, levels)
|
||||
|
||||
return "|".join(items)
|
||||
@@ -0,0 +1,150 @@
|
||||
import logging
|
||||
import sys
|
||||
|
||||
from ...types import Timestamp
|
||||
from ...util import none_or
|
||||
from ..errors import MalformedResponse
|
||||
from .collection import Collection
|
||||
|
||||
logger = logging.getLogger("mw.api.collections.deletedrevs")
|
||||
|
||||
|
||||
class DeletedRevisions(Collection):
|
||||
PROPERTIES = {'ids', 'flags', 'timestamp', 'user', 'userid', 'size',
|
||||
'sha1', 'contentmodel', 'comment', 'parsedcomment', 'content',
|
||||
'tags'}
|
||||
|
||||
# TODO:
|
||||
# This is *not* the right way to do this, but it should work for all queries.
|
||||
MAX_REVISIONS = 500
|
||||
|
||||
def get(self, rev_id, *args, **kwargs):
|
||||
|
||||
rev_id = int(rev_id)
|
||||
|
||||
revs = list(self.query(revids={rev_id}, **kwargs))
|
||||
|
||||
if len(revs) < 1:
|
||||
raise KeyError(rev_id)
|
||||
else:
|
||||
return revs[0]
|
||||
|
||||
def query(self, *args, limit=sys.maxsize, **kwargs):
|
||||
"""
|
||||
Queries deleted revisions.
|
||||
See https://www.mediawiki.org/wiki/API:Deletedrevs
|
||||
|
||||
:Parameters:
|
||||
titles : set(str)
|
||||
A set of page names to query (note that namespace prefix is expected)
|
||||
start : :class:`mw.Timestamp`
|
||||
A timestamp to start querying from
|
||||
end : :class:`mw.Timestamp`
|
||||
A timestamp to end querying
|
||||
from_title : str
|
||||
A title from which to start querying (alphabetically)
|
||||
to_title : str
|
||||
A title from which to stop querying (alphabetically)
|
||||
prefix : str
|
||||
A title prefix to match on
|
||||
drcontinue : str
|
||||
When more results are available, use this to continue (3) Note: may only work if drdir is set to newer.
|
||||
unique : bool
|
||||
List only one revision for each page
|
||||
tag : str
|
||||
Only list revision tagged with this tag
|
||||
user : str
|
||||
Only list revisions saved by this user_text
|
||||
excludeuser : str
|
||||
Do not list revision saved by this user_text
|
||||
namespace : int
|
||||
Only list pages in this namespace (id)
|
||||
limit : int
|
||||
Limit the number of results
|
||||
direction : str
|
||||
"newer" or "older"
|
||||
properties : set(str)
|
||||
A list of properties to include in the results:
|
||||
|
||||
|
||||
* ids - The ID of the revision.
|
||||
* flags - Revision flags (minor).
|
||||
* timestamp - The timestamp of the revision.
|
||||
* user - User that made the revision.
|
||||
* userid - User ID of the revision creator.
|
||||
* size - Length (bytes) of the revision.
|
||||
* sha1 - SHA-1 (base 16) of the revision.
|
||||
* contentmodel - Content model ID of the revision.
|
||||
* comment - Comment by the user for the revision.
|
||||
* parsedcomment - Parsed comment by the user for the revision.
|
||||
* content - Text of the revision.
|
||||
* tags - Tags for the revision.
|
||||
"""
|
||||
# `limit` means something diffent here
|
||||
kwargs['limit'] = min(limit, self.MAX_REVISIONS)
|
||||
revisions_yielded = 0
|
||||
done = False
|
||||
while not done and revisions_yielded <= limit:
|
||||
rev_docs, query_continue = self._query(*args, **kwargs)
|
||||
for doc in rev_docs:
|
||||
yield doc
|
||||
revisions_yielded += 1
|
||||
if revisions_yielded >= limit:
|
||||
break
|
||||
|
||||
if query_continue != "" and len(rev_docs) > 0:
|
||||
kwargs['query_continue'] = query_continue
|
||||
else:
|
||||
done = True
|
||||
|
||||
def _query(self, titles=None, pageids=None, revids=None,
|
||||
start=None, end=None, query_continue=None, unique=None, tag=None,
|
||||
user=None, excludeuser=None, namespace=None, limit=None,
|
||||
properties=None, direction=None):
|
||||
|
||||
params = {
|
||||
'action': "query",
|
||||
'prop': "deletedrevisions"
|
||||
}
|
||||
|
||||
params['titles'] = self._items(titles)
|
||||
params['pageids'] = self._items(pageids)
|
||||
params['revids'] = self._items(revids)
|
||||
params['drvprop'] = self._items(properties, levels=self.PROPERTIES)
|
||||
params['drvlimit'] = none_or(limit, int)
|
||||
params['drvstart'] = self._check_timestamp(start)
|
||||
params['drvend'] = self._check_timestamp(end)
|
||||
|
||||
params['drvdir'] = self._check_direction(direction)
|
||||
params['drvuser'] = none_or(user, str)
|
||||
params['drvexcludeuser'] = none_or(excludeuser, int)
|
||||
params['drvtag'] = none_or(tag, str)
|
||||
params.update(query_continue or {'continue': ""})
|
||||
|
||||
doc = self.session.get(params)
|
||||
doc_copy = dict(doc)
|
||||
|
||||
try:
|
||||
if 'continue' in doc:
|
||||
query_continue = doc['continue']
|
||||
else:
|
||||
query_continue = ''
|
||||
|
||||
pages = doc['query']['pages'].values()
|
||||
rev_docs = []
|
||||
|
||||
for page_doc in pages:
|
||||
page_rev_docs = page_doc.get('deletedrevisions', [])
|
||||
|
||||
try: del page_doc['deletedrevisions']
|
||||
except KeyError: pass
|
||||
|
||||
for rev_doc in page_rev_docs:
|
||||
rev_doc['page'] = page_doc
|
||||
|
||||
rev_docs.extend(page_rev_docs)
|
||||
|
||||
return rev_docs, query_continue
|
||||
|
||||
except KeyError as e:
|
||||
raise MalformedResponse(str(e), doc)
|
||||
@@ -0,0 +1,50 @@
|
||||
import logging
|
||||
|
||||
from ...util import none_or
|
||||
from .collection import Collection
|
||||
|
||||
logger = logging.getLogger("mw.api.collections.pages")
|
||||
|
||||
|
||||
class Pages(Collection):
|
||||
"""
|
||||
TODO
|
||||
"""
|
||||
|
||||
def _edit(self, title=None, pageid=None, section=None, sectiontitle=None,
|
||||
text=None, token=None, summary=None, minor=None,
|
||||
notminor=None, bot=None, basetimestamp=None,
|
||||
starttimestamp=None, recreate=None, createonly=None,
|
||||
nocreate=None, watch=None, unwatch=None, watchlist=None,
|
||||
md5=None, prependtext=None, appendtext=None, undo=None,
|
||||
undoafter=None, redirect=None, contentformat=None,
|
||||
contentmodel=None, assert_=None, nassert=None,
|
||||
captchaword=None, captchaid=None):
|
||||
params = {
|
||||
'action': "edit"
|
||||
}
|
||||
params['title'] = none_or(title, str)
|
||||
params['pageid'] = none_or(pageid, int)
|
||||
params['section'] = none_or(section, int, levels={'new'})
|
||||
params['sectiontitle'] = none_or(sectiontitle, str)
|
||||
params['text'] = none_or(text, str)
|
||||
params['token'] = none_or(token, str)
|
||||
params['summary'] = none_or(summary, str)
|
||||
params['minor'] = none_or(minor, bool)
|
||||
params['notminor'] = none_or(notminor, bool)
|
||||
params['bot'] = none_or(bot, bool)
|
||||
params['basetimestamp'] = self._check_timestamp(basetimestamp)
|
||||
params['starttimestamp'] = self._check_timestamp(starttimestamp)
|
||||
params['recreate'] = none_or(recreate, bool)
|
||||
params['createonly'] = none_or(createonly, bool)
|
||||
params['nocreate'] = none_or(nocreate, bool)
|
||||
params['watch'] = none_or(watch, bool)
|
||||
params['unwatch'] = none_or(unwatch, bool)
|
||||
params['watchlist'] = none_or(watchlist, bool)
|
||||
params['md5'] = none_or(md5, str)
|
||||
params['prependtext'] = none_or(prependtext, str)
|
||||
params['appendtext'] = none_or(appendtext, str)
|
||||
params['undo'] = none_or(undo, int)
|
||||
params['undoafter'] = none_or(undoafter, int)
|
||||
|
||||
# TODO finish this
|
||||
@@ -0,0 +1,192 @@
|
||||
import logging
|
||||
import re
|
||||
|
||||
from ...util import none_or
|
||||
from ..errors import MalformedResponse
|
||||
from .collection import Collection
|
||||
|
||||
logger = logging.getLogger("mw.api.collections.recent_changes")
|
||||
|
||||
|
||||
class RecentChanges(Collection):
|
||||
"""
|
||||
Recent changes (revisions, page creations, registrations, moves, etc.)
|
||||
"""
|
||||
|
||||
RCCONTINUE = re.compile(r"([0-9]{4}-[0-9]{2}-[0-9]{2}T" +
|
||||
r"[0-9]{2}:[0-9]{2}:[0-9]{2}Z|" +
|
||||
r"[0-9]{14})" +
|
||||
r"\|[0-9]+")
|
||||
|
||||
PROPERTIES = {'user', 'userid', 'comment', 'timestamp', 'title',
|
||||
'ids', 'sizes', 'redirect', 'flags', 'loginfo',
|
||||
'tags', 'sha1'}
|
||||
|
||||
SHOW = {'minor', '!minor', 'bot', '!bot', 'anon', '!anon',
|
||||
'redirect', '!redirect', 'patrolled', '!patrolled'}
|
||||
|
||||
TYPES = {'edit', 'external', 'new', 'log'}
|
||||
|
||||
DIRECTIONS = {'newer', 'older'}
|
||||
|
||||
MAX_CHANGES = 50
|
||||
|
||||
def _check_rccontinue(self, rccontinue):
|
||||
if rccontinue is None:
|
||||
return None
|
||||
elif self.RCCONTINUE.match(rccontinue):
|
||||
return rccontinue
|
||||
else:
|
||||
raise TypeError(
|
||||
"rccontinue {0} is not formatted correctly ".format(rccontinue) +
|
||||
"'%Y-%m-%dT%H:%M:%SZ|<last_rcid>'"
|
||||
)
|
||||
|
||||
def query(self, *args, limit=None, **kwargs):
|
||||
"""
|
||||
Enumerate recent changes.
|
||||
See `<https://www.mediawiki.org/wiki/API:Recentchanges>`_
|
||||
|
||||
:Parameters:
|
||||
start : :class:`mw.Timestamp`
|
||||
The timestamp to start enumerating from
|
||||
end : :class:`mw.Timestamp`
|
||||
The timestamp to end enumerating
|
||||
direction :
|
||||
"newer" or "older"
|
||||
namespace : int
|
||||
Filter log entries to only this namespace(s)
|
||||
user : str
|
||||
Only list changes by this user
|
||||
excludeuser : str
|
||||
Don't list changes by this user
|
||||
tag : str
|
||||
Only list changes tagged with this tag
|
||||
properties : set(str)
|
||||
Include additional pieces of information
|
||||
|
||||
* user - Adds the user responsible for the edit and tags if they are an IP
|
||||
* userid - Adds the user id responsible for the edit
|
||||
* comment - Adds the comment for the edit
|
||||
* parsedcomment - Adds the parsed comment for the edit
|
||||
* flags - Adds flags for the edit
|
||||
* timestamp - Adds timestamp of the edit
|
||||
* title - Adds the page title of the edit
|
||||
* ids - Adds the page ID, recent changes ID and the new and old revision ID
|
||||
* sizes - Adds the new and old page length in bytes
|
||||
* redirect - Tags edit if page is a redirect
|
||||
* patrolled - Tags patrollable edits as being patrolled or unpatrolled
|
||||
* loginfo - Adds log information (logid, logtype, etc) to log entries
|
||||
* tags - Lists tags for the entry
|
||||
* sha1 - Adds the content checksum for entries associated with a revision
|
||||
|
||||
token : set(str)
|
||||
Which tokens to obtain for each change
|
||||
|
||||
* patrol
|
||||
|
||||
show : set(str)
|
||||
Show only items that meet this criteria. For example, to see
|
||||
only minor edits done by logged-in users, set
|
||||
show={'minor', '!anon'}.
|
||||
|
||||
* minor
|
||||
* !minor
|
||||
* bot
|
||||
* !bot
|
||||
* anon
|
||||
* !anon
|
||||
* redirect
|
||||
* !redirect
|
||||
* patrolled
|
||||
* !patrolled
|
||||
* unpatrolled
|
||||
limit : int
|
||||
How many total changes to return
|
||||
type : set(str)
|
||||
Which types of changes to show
|
||||
|
||||
* edit
|
||||
* external
|
||||
* new
|
||||
* log
|
||||
|
||||
toponly : bool
|
||||
Only list changes which are the latest revision
|
||||
rccontinue : str
|
||||
Use this to continue loading results from where you last left off
|
||||
"""
|
||||
limit = none_or(limit, int)
|
||||
|
||||
changes_yielded = 0
|
||||
done = False
|
||||
while not done:
|
||||
|
||||
if limit is None:
|
||||
kwargs['limit'] = self.MAX_CHANGES
|
||||
else:
|
||||
kwargs['limit'] = min(limit - changes_yielded, self.MAX_CHANGES)
|
||||
|
||||
rc_docs, rccontinue = self._query(*args, **kwargs)
|
||||
|
||||
for doc in rc_docs:
|
||||
yield doc
|
||||
changes_yielded += 1
|
||||
|
||||
if limit is not None and changes_yielded >= limit:
|
||||
done = True
|
||||
break
|
||||
|
||||
if rccontinue is not None and len(rc_docs) > 0:
|
||||
|
||||
kwargs['rccontinue'] = rccontinue
|
||||
else:
|
||||
done = True
|
||||
|
||||
def _query(self, start=None, end=None, direction=None, namespace=None,
|
||||
user=None, excludeuser=None, tag=None, properties=None,
|
||||
token=None, show=None, limit=None, type=None,
|
||||
toponly=None, rccontinue=None):
|
||||
|
||||
params = {
|
||||
'action': "query",
|
||||
'list': "recentchanges"
|
||||
}
|
||||
|
||||
params['rcstart'] = none_or(start, str)
|
||||
params['rcend'] = none_or(end, str)
|
||||
|
||||
assert direction in {None} | self.DIRECTIONS, \
|
||||
"Direction must be one of {0}".format(self.DIRECTIONS)
|
||||
|
||||
params['rcdir'] = direction
|
||||
params['rcnamespace'] = none_or(namespace, int)
|
||||
params['rcuser'] = none_or(user, str)
|
||||
params['rcexcludeuser'] = none_or(excludeuser, str)
|
||||
params['rctag'] = none_or(tag, str)
|
||||
params['rcprop'] = self._items(properties, levels=self.PROPERTIES)
|
||||
params['rctoken'] = none_or(tag, str)
|
||||
params['rcshow'] = self._items(show, levels=self.SHOW)
|
||||
params['rclimit'] = none_or(limit, int)
|
||||
params['rctype'] = self._items(type, self.TYPES)
|
||||
params['rctoponly'] = none_or(toponly, bool)
|
||||
params['rccontinue'] = self._check_rccontinue(rccontinue)
|
||||
|
||||
doc = self.session.get(params)
|
||||
|
||||
try:
|
||||
rc_docs = doc['query']['recentchanges']
|
||||
|
||||
if 'query-continue' in doc:
|
||||
rccontinue = \
|
||||
doc['query-continue']['recentchanges']['rccontinue']
|
||||
elif len(rc_docs) > 0:
|
||||
rccontinue = "|".join([rc_docs[-1]['timestamp'],
|
||||
str(rc_docs[-1]['rcid'] + 1)])
|
||||
else:
|
||||
pass # Leave it be
|
||||
|
||||
except KeyError as e:
|
||||
raise MalformedResponse(str(e), doc)
|
||||
|
||||
return rc_docs, rccontinue
|
||||
@@ -0,0 +1,220 @@
|
||||
import logging
|
||||
|
||||
from ...util import none_or
|
||||
from ..errors import MalformedResponse
|
||||
from .collection import Collection
|
||||
|
||||
logger = logging.getLogger("mw.api.collections.revisions")
|
||||
|
||||
|
||||
class Revisions(Collection):
|
||||
"""
|
||||
A collection of revisions indexes by title, page_id and user_text.
|
||||
Note that revisions of deleted pages are queriable via
|
||||
:class:`mw.api.DeletedRevs`.
|
||||
"""
|
||||
|
||||
PROPERTIES = {'ids', 'flags', 'timestamp', 'user', 'userid', 'size',
|
||||
'sha1', 'contentmodel', 'comment', 'parsedcomment',
|
||||
'content', 'tags', 'flagged'}
|
||||
|
||||
DIFF_TO = {'prev', 'next', 'cur'}
|
||||
|
||||
# This is *not* the right way to do this, but it should work for all queries.
|
||||
MAX_REVISIONS = 50
|
||||
|
||||
def get(self, rev_id, **kwargs):
|
||||
"""
|
||||
Get a single revision based on it's ID. Throws a :py:class:`KeyError`
|
||||
if the rev_id cannot be found.
|
||||
|
||||
:Parameters:
|
||||
rev_id : int
|
||||
Revision ID
|
||||
``**kwargs``
|
||||
Passed to :py:meth:`query`
|
||||
|
||||
:Returns:
|
||||
A single rev dict
|
||||
"""
|
||||
rev_id = int(rev_id)
|
||||
|
||||
revs = list(self.query(revids={rev_id}, **kwargs))
|
||||
|
||||
if len(revs) < 1:
|
||||
raise KeyError(rev_id)
|
||||
else:
|
||||
return revs[0]
|
||||
|
||||
def query(self, *args, limit=None, **kwargs):
|
||||
"""
|
||||
Get revision information.
|
||||
See `<https://www.mediawiki.org/wiki/API:Properties#revisions_.2F_rv>`_
|
||||
|
||||
:Parameters:
|
||||
properties : set(str)
|
||||
Which properties to get for each revision:
|
||||
|
||||
* ids - The ID of the revision
|
||||
* flags - Revision flags (minor)
|
||||
* timestamp - The timestamp of the revision
|
||||
* user - User that made the revision
|
||||
* userid - User id of revision creator
|
||||
* size - Length (bytes) of the revision
|
||||
* sha1 - SHA-1 (base 16) of the revision
|
||||
* contentmodel - Content model id
|
||||
* comment - Comment by the user for revision
|
||||
* parsedcomment - Parsed comment by the user for the revision
|
||||
* content - Text of the revision
|
||||
* tags - Tags for the revision
|
||||
limit : int
|
||||
Limit how many revisions will be returned
|
||||
No more than 500 (5000 for bots) allowed
|
||||
start_id : int
|
||||
From which revision id to start enumeration (enum)
|
||||
end_id : int
|
||||
Stop revision enumeration on this revid
|
||||
start : :class:`mw.Timestamp`
|
||||
From which revision timestamp to start enumeration (enum)
|
||||
end : :class:`mw.Timestamp`
|
||||
Enumerate up to this timestamp
|
||||
direction : str
|
||||
"newer" or "older"
|
||||
user : str
|
||||
Only include revisions made by user_text
|
||||
excludeuser : bool
|
||||
Exclude revisions made by user
|
||||
tag : str
|
||||
Only list revisions tagged with this tag
|
||||
expandtemplates : bool
|
||||
Expand templates in revision content (requires "content" propery)
|
||||
generatexml : bool
|
||||
Generate XML parse tree for revision content (requires "content" propery)
|
||||
parse : bool
|
||||
Parse revision content (requires "content" propery)
|
||||
section : int
|
||||
Only retrieve the content of this section number
|
||||
token : set(str)
|
||||
Which tokens to obtain for each revision
|
||||
|
||||
* rollback - See `<https://www.mediawiki.org/wiki/API:Edit_-_Rollback#Token>`_
|
||||
rvcontinue : str
|
||||
When more results are available, use this to continue
|
||||
diffto : int
|
||||
Revision ID to diff each revision to. Use "prev", "next" and
|
||||
"cur" for the previous, next and current revision respectively
|
||||
difftotext : str
|
||||
Text to diff each revision to. Only diffs a limited number of
|
||||
revisions. Overrides diffto. If section is set, only that
|
||||
section will be diffed against this text
|
||||
contentformat : str
|
||||
Serialization format used for difftotext and expected for output of content
|
||||
|
||||
* text/x-wiki
|
||||
* text/javascript
|
||||
* text/css
|
||||
* text/plain
|
||||
* application/json
|
||||
|
||||
:Returns:
|
||||
An iterator of rev dicts returned from the API.
|
||||
"""
|
||||
|
||||
revisions_yielded = 0
|
||||
done = False
|
||||
while not done:
|
||||
if limit == None:
|
||||
kwargs['limit'] = self.MAX_REVISIONS
|
||||
else:
|
||||
kwargs['limit'] = min(limit - revisions_yielded, self.MAX_REVISIONS)
|
||||
|
||||
rev_docs, rvcontinue = self._query(*args, **kwargs)
|
||||
|
||||
for doc in rev_docs:
|
||||
yield doc
|
||||
revisions_yielded += 1
|
||||
|
||||
if limit != None and revisions_yielded >= limit:
|
||||
done = True
|
||||
break
|
||||
|
||||
if rvcontinue != None and len(rev_docs) > 0:
|
||||
kwargs['rvcontinue'] = rvcontinue
|
||||
else:
|
||||
done = True
|
||||
|
||||
|
||||
def _query(self, revids=None, titles=None, pageids=None, properties=None,
|
||||
limit=None, start_id=None, end_id=None, start=None,
|
||||
end=None, direction=None, user=None, excludeuser=None,
|
||||
tag=None, expandtemplates=None, generatexml=None,
|
||||
parse=None, section=None, token=None, rvcontinue=None,
|
||||
diffto=None, difftotext=None, contentformat=None):
|
||||
|
||||
params = {
|
||||
'action': "query",
|
||||
'prop': "revisions",
|
||||
'rawcontinue': ''
|
||||
}
|
||||
|
||||
params['revids'] = self._items(revids, type=int)
|
||||
params['titles'] = self._items(titles)
|
||||
params['pageids'] = self._items(pageids, type=int)
|
||||
|
||||
params['rvprop'] = self._items(properties, levels=self.PROPERTIES)
|
||||
|
||||
if revids == None: # Can't have a limit unless revids is none
|
||||
params['rvlimit'] = none_or(limit, int)
|
||||
|
||||
params['rvstartid'] = none_or(start_id, int)
|
||||
params['rvendid'] = none_or(end_id, int)
|
||||
params['rvstart'] = self._check_timestamp(start)
|
||||
params['rvend'] = self._check_timestamp(end)
|
||||
|
||||
params['rvdir'] = self._check_direction(direction)
|
||||
params['rvuser'] = none_or(user, str)
|
||||
params['rvexcludeuser'] = none_or(excludeuser, int)
|
||||
params['rvtag'] = none_or(tag, str)
|
||||
params['rvexpandtemplates'] = none_or(expandtemplates, bool)
|
||||
params['rvgeneratexml'] = none_or(generatexml, bool)
|
||||
params['rvparse'] = none_or(parse, bool)
|
||||
params['rvsection'] = none_or(section, int)
|
||||
params['rvtoken'] = none_or(token, str)
|
||||
params['rvcontinue'] = none_or(rvcontinue, str)
|
||||
params['rvdiffto'] = self._check_diffto(diffto)
|
||||
params['rvdifftotext'] = none_or(difftotext, str)
|
||||
params['rvcontentformat'] = none_or(contentformat, str)
|
||||
|
||||
doc = self.session.get(params)
|
||||
|
||||
try:
|
||||
if 'query-continue' in doc:
|
||||
rvcontinue = doc['query-continue']['revisions']['rvcontinue']
|
||||
else:
|
||||
rvcontinue = None
|
||||
|
||||
pages = doc['query'].get('pages', {}).values()
|
||||
rev_docs = []
|
||||
|
||||
for page_doc in pages:
|
||||
if 'missing' in page_doc or 'revisions' not in page_doc: continue
|
||||
|
||||
page_rev_docs = page_doc['revisions']
|
||||
del page_doc['revisions']
|
||||
|
||||
for rev_doc in page_rev_docs:
|
||||
rev_doc['page'] = page_doc
|
||||
|
||||
rev_docs.extend(page_rev_docs)
|
||||
|
||||
return rev_docs, rvcontinue
|
||||
|
||||
except KeyError as e:
|
||||
raise MalformedResponse(str(e), doc)
|
||||
|
||||
|
||||
def _check_diffto(self, diffto):
|
||||
if diffto == None or diffto in self.DIFF_TO:
|
||||
return diffto
|
||||
else:
|
||||
return int(diffto)
|
||||
@@ -0,0 +1,81 @@
|
||||
import logging
|
||||
|
||||
from ..errors import MalformedResponse
|
||||
from .collection import Collection
|
||||
|
||||
logger = logging.getLogger("mw.api.collections.site_info")
|
||||
|
||||
|
||||
class SiteInfo(Collection):
|
||||
"""
|
||||
General information about the site.
|
||||
"""
|
||||
|
||||
PROPERTIES = {'general', 'namespaces', 'namespacealiases',
|
||||
'specialpagealiases', 'magicwords', 'interwikimap',
|
||||
'dbrepllag', 'statistics', 'usergroups', 'extensions',
|
||||
'fileextensions', 'rightsinfo', 'languages', 'skins',
|
||||
'extensiontags', 'functionhooks', 'showhooks',
|
||||
'variables', 'protocols'}
|
||||
|
||||
FILTERIW = {'local', '!local'}
|
||||
|
||||
def query(self, properties=None, filteriw=None, showalldb=None,
|
||||
numberinggroup=None, inlanguagecode=None):
|
||||
"""
|
||||
General information about the site.
|
||||
See `<https://www.mediawiki.org/wiki/API:Meta#siteinfo_.2F_si>`_
|
||||
|
||||
:Parameters:
|
||||
properties: set(str)
|
||||
Which sysinfo properties to get:
|
||||
|
||||
* general - Overall system information
|
||||
* namespaces - List of registered namespaces and their canonical names
|
||||
* namespacealiases - List of registered namespace aliases
|
||||
* specialpagealiases - List of special page aliases
|
||||
* magicwords - List of magic words and their aliases
|
||||
* statistics - Returns site statistics
|
||||
* interwikimap - Returns interwiki map (optionally filtered, (optionally localised by using siinlanguagecode))
|
||||
* dbrepllag - Returns database server with the highest replication lag
|
||||
* usergroups - Returns user groups and the associated permissions
|
||||
* extensions - Returns extensions installed on the wiki
|
||||
* fileextensions - Returns list of file extensions allowed to be uploaded
|
||||
* rightsinfo - Returns wiki rights (license) information if available
|
||||
* restrictions - Returns information on available restriction (protection) types
|
||||
* languages - Returns a list of languages MediaWiki supports(optionally localised by using siinlanguagecode)
|
||||
* skins - Returns a list of all enabled skins
|
||||
* extensiontags - Returns a list of parser extension tags
|
||||
* functionhooks - Returns a list of parser function hooks
|
||||
* showhooks - Returns a list of all subscribed hooks (contents of $wgHooks)
|
||||
* variables - Returns a list of variable IDs
|
||||
* protocols - Returns a list of protocols that are allowed in external links.
|
||||
* defaultoptions - Returns the default values for user preferences.
|
||||
filteriw : str
|
||||
"local" or "!local" Return only local or only nonlocal entries of the interwiki map
|
||||
showalldb : bool
|
||||
List all database servers, not just the one lagging the most
|
||||
numberingroup : bool
|
||||
Lists the number of users in user groups
|
||||
inlanguagecode : bool
|
||||
Language code for localised language names (best effort, use CLDR extension)
|
||||
"""
|
||||
|
||||
siprop = self._items(properties, levels=self.PROPERTIES)
|
||||
|
||||
doc = self.session.get(
|
||||
{
|
||||
'action': "query",
|
||||
'meta': "siteinfo",
|
||||
'siprop': siprop,
|
||||
'sifilteriw': filteriw,
|
||||
'sishowalldb': showalldb,
|
||||
'sinumberinggroup': numberinggroup,
|
||||
'siinlanguagecode': inlanguagecode
|
||||
}
|
||||
)
|
||||
|
||||
try:
|
||||
return doc['query']
|
||||
except KeyError as e:
|
||||
raise MalformedResponse(str(e), doc)
|
||||
@@ -0,0 +1,132 @@
|
||||
import logging
|
||||
|
||||
from ...util import none_or
|
||||
from ..errors import MalformedResponse
|
||||
from .collection import Collection
|
||||
|
||||
logger = logging.getLogger("mw.api.collections.user_contribs")
|
||||
|
||||
|
||||
class UserContribs(Collection):
|
||||
"""
|
||||
A collection of revisions indexes by user.
|
||||
"""
|
||||
|
||||
PROPERTIES = {'ids', 'title', 'timestamp', 'comment', 'parsedcomment',
|
||||
'size', 'sizediff', 'flags', 'patrolled', 'tags'}
|
||||
|
||||
SHOW = {'minor', '!minor', 'patrolled', '!patrolled'}
|
||||
|
||||
MAX_REVISIONS = 50
|
||||
|
||||
def query(self, *args, limit=None, **kwargs):
|
||||
"""
|
||||
Get a user's revisions.
|
||||
See `<https://www.mediawiki.org/wiki/API:Usercontribs>`_
|
||||
|
||||
:Parameters:
|
||||
limit : int
|
||||
The maximum number of contributions to return.
|
||||
start : :class:`mw.Timestamp`
|
||||
The start timestamp to return from
|
||||
end : :class:`mw.Timestamp`
|
||||
The end timestamp to return to
|
||||
user : set(str)
|
||||
The users to retrieve contributions for. Maximum number of values 50 (500 for bots)
|
||||
userprefix : set(str)
|
||||
Retrieve contributions for all users whose names begin with this value.
|
||||
direction : str
|
||||
"newer" or "older"
|
||||
namespace : int
|
||||
Only list contributions in these namespaces
|
||||
properties :
|
||||
Include additional pieces of information
|
||||
|
||||
* ids - Adds the page ID and revision ID
|
||||
* title - Adds the title and namespace ID of the page
|
||||
* timestamp - Adds the timestamp of the edit
|
||||
* comment - Adds the comment of the edit
|
||||
* parsedcomment - Adds the parsed comment of the edit
|
||||
* size - Adds the new size of the edit
|
||||
* sizediff - Adds the size delta of the edit against its parent
|
||||
* flags - Adds flags of the edit
|
||||
* patrolled - Tags patrolled edits
|
||||
* tags - Lists tags for the edit
|
||||
show : set(str)
|
||||
Show only items that meet thse criteria, e.g. non minor edits only: ucshow=!minor.
|
||||
NOTE: If ucshow=patrolled or ucshow=!patrolled is set, revisions older than
|
||||
$wgRCMaxAge (2592000) won't be shown
|
||||
|
||||
* minor
|
||||
* !minor,
|
||||
* patrolled,
|
||||
* !patrolled,
|
||||
* top,
|
||||
* !top,
|
||||
* new,
|
||||
* !new
|
||||
tag : str
|
||||
Only list revisions tagged with this tag
|
||||
toponly : bool
|
||||
DEPRECATED! Only list changes which are the latest revision
|
||||
"""
|
||||
limit = none_or(limit, int)
|
||||
|
||||
revisions_yielded = 0
|
||||
done = False
|
||||
while not done:
|
||||
|
||||
if limit is None:
|
||||
kwargs['limit'] = self.MAX_REVISIONS
|
||||
else:
|
||||
kwargs['limit'] = min(limit - revisions_yielded, self.MAX_REVISIONS)
|
||||
|
||||
uc_docs, uccontinue = self._query(*args, **kwargs)
|
||||
|
||||
for doc in uc_docs:
|
||||
yield doc
|
||||
revisions_yielded += 1
|
||||
|
||||
if limit is not None and revisions_yielded >= limit:
|
||||
done = True
|
||||
break
|
||||
|
||||
if uccontinue is None or len(uc_docs) == 0:
|
||||
done = True
|
||||
else:
|
||||
kwargs['uccontinue'] = uccontinue
|
||||
|
||||
def _query(self, user=None, userprefix=None, limit=None, start=None,
|
||||
end=None, direction=None, namespace=None, properties=None,
|
||||
show=None, tag=None, toponly=None,
|
||||
uccontinue=None):
|
||||
|
||||
params = {
|
||||
'action': "query",
|
||||
'list': "usercontribs"
|
||||
}
|
||||
params['uclimit'] = none_or(limit, int)
|
||||
params['ucstart'] = self._check_timestamp(start)
|
||||
params['ucend'] = self._check_timestamp(end)
|
||||
if uccontinue is not None:
|
||||
params.update(uccontinue)
|
||||
params['ucuser'] = self._items(user, type=str)
|
||||
params['ucuserprefix'] = self._items(userprefix, type=str)
|
||||
params['ucdir'] = self._check_direction(direction)
|
||||
params['ucnamespace'] = none_or(namespace, int)
|
||||
params['ucprop'] = self._items(properties, levels=self.PROPERTIES)
|
||||
params['ucshow'] = self._items(show, levels=self.SHOW)
|
||||
|
||||
doc = self.session.get(params)
|
||||
try:
|
||||
if 'query-continue' in doc:
|
||||
uccontinue = doc['query-continue']['usercontribs']
|
||||
else:
|
||||
uccontinue = None
|
||||
|
||||
uc_docs = doc['query']['usercontribs']
|
||||
|
||||
return uc_docs, uccontinue
|
||||
|
||||
except KeyError as e:
|
||||
raise MalformedResponse(str(e), doc)
|
||||
@@ -0,0 +1,83 @@
|
||||
import logging
|
||||
|
||||
from ...util import none_or
|
||||
from ..errors import MalformedResponse
|
||||
from .collection import Collection
|
||||
|
||||
logger = logging.getLogger("mw.api.collections.users")
|
||||
|
||||
|
||||
class Users(Collection):
|
||||
"""
|
||||
A collection of information about users
|
||||
"""
|
||||
|
||||
PROPERTIES = {'blockinfo', 'implicitgroups', 'groups', 'registration',
|
||||
'emailable', 'editcount', 'gender'}
|
||||
|
||||
SHOW = {'minor', '!minor', 'patrolled', '!patrolled'}
|
||||
|
||||
MAX_REVISIONS = 50
|
||||
|
||||
def query(self, *args, **kwargs):
|
||||
"""
|
||||
Get a user's metadata.
|
||||
See `<https://www.mediawiki.org/wiki/API:Users>`_
|
||||
|
||||
:Parameters:
|
||||
users : str
|
||||
The usernames of the users to be retrieved.
|
||||
|
||||
properties : set(str)
|
||||
Include additional pieces of information
|
||||
|
||||
blockinfo - Tags if the user is blocked, by whom, and
|
||||
for what reason
|
||||
groups - Lists all the groups the user(s) belongs to
|
||||
implicitgroups - Lists all the groups a user is automatically
|
||||
a member of
|
||||
rights - Lists all the rights the user(s) has
|
||||
editcount - Adds the user's edit count
|
||||
registration - Adds the user's registration timestamp
|
||||
emailable - Tags if the user can and wants to receive
|
||||
email through [[Special:Emailuser]]
|
||||
gender - Tags the gender of the user. Returns "male",
|
||||
"female", or "unknown"
|
||||
"""
|
||||
done = False
|
||||
while not done:
|
||||
|
||||
us_docs, query_continue = self._query(*args, **kwargs)
|
||||
|
||||
for doc in us_docs:
|
||||
yield doc
|
||||
|
||||
if query_continue is None or len(us_docs) == 0:
|
||||
done = True
|
||||
else:
|
||||
kwargs['query_continue'] = query_continue
|
||||
|
||||
def _query(self, users, query_continue=None, properties=None):
|
||||
|
||||
params = {
|
||||
'action': "query",
|
||||
'list': "users"
|
||||
}
|
||||
params['ususers'] = self._items(users, type=str)
|
||||
params['usprop'] = self._items(properties, levels=self.PROPERTIES)
|
||||
if query_continue is not None:
|
||||
params.update(query_continue)
|
||||
|
||||
doc = self.session.get(params)
|
||||
try:
|
||||
if 'query-continue' in doc:
|
||||
query_continue = doc['query-continue']['users']
|
||||
else:
|
||||
query_continue = None
|
||||
|
||||
us_docs = doc['query']['users']
|
||||
|
||||
return us_docs, query_continue
|
||||
|
||||
except KeyError as e:
|
||||
raise MalformedResponse(str(e), doc)
|
||||
48
mediawiki_dump_tools/Mediawiki-Utilities/mw/api/errors.py
Normal file
48
mediawiki_dump_tools/Mediawiki-Utilities/mw/api/errors.py
Normal file
@@ -0,0 +1,48 @@
|
||||
class DocError(Exception):
|
||||
def __init__(self, message, doc):
|
||||
super().__init__(message)
|
||||
|
||||
self.doc = doc
|
||||
"""
|
||||
The document returned by the API that brought about this error.
|
||||
"""
|
||||
|
||||
|
||||
class APIError(DocError):
|
||||
def __init__(self, doc):
|
||||
|
||||
code = doc.get('error', {}).get('code')
|
||||
message = doc.get('error', {}).get('message')
|
||||
|
||||
super().__init__("{0}:{1}".format(code, message), doc)
|
||||
|
||||
self.code = code
|
||||
"""
|
||||
The error code returned by the api -- if available.
|
||||
"""
|
||||
|
||||
self.message = message
|
||||
"""
|
||||
The error message returned by the api -- if available.
|
||||
"""
|
||||
|
||||
class AuthenticationError(DocError):
|
||||
def __init__(self, doc):
|
||||
result = doc['login']['result']
|
||||
super().__init__(result, doc)
|
||||
|
||||
self.result = result
|
||||
"""
|
||||
The result code of an authentication attempt.
|
||||
"""
|
||||
|
||||
|
||||
class MalformedResponse(DocError):
|
||||
def __init__(self, key, doc):
|
||||
|
||||
super().__init__("Expected to find '{0}' in result.".format(key), doc)
|
||||
|
||||
self.key = key
|
||||
"""
|
||||
The expected, but missing key from the API call.
|
||||
"""
|
||||
134
mediawiki_dump_tools/Mediawiki-Utilities/mw/api/session.py
Normal file
134
mediawiki_dump_tools/Mediawiki-Utilities/mw/api/session.py
Normal file
@@ -0,0 +1,134 @@
|
||||
import logging
|
||||
|
||||
from ..util import api
|
||||
from .collections import (DeletedRevisions, Pages, RecentChanges, Revisions,
|
||||
SiteInfo, UserContribs, Users)
|
||||
from .errors import APIError, AuthenticationError, MalformedResponse
|
||||
|
||||
logger = logging.getLogger("mw.api.session")
|
||||
|
||||
DEFAULT_USER_AGENT = "MediaWiki-Utilities"
|
||||
"""
|
||||
The default User-Agent to be sent with requests to the API.
|
||||
"""
|
||||
|
||||
class Session(api.Session):
|
||||
"""
|
||||
Represents a connection to a MediaWiki API.
|
||||
|
||||
Cookies and other session information is preserved.
|
||||
|
||||
:Parameters:
|
||||
uri : str
|
||||
The base URI for the API to use. Usually ends in "api.php"
|
||||
user_agent : str
|
||||
The User-Agent to be sent with requests. Will raise a warning if
|
||||
left to default value.
|
||||
"""
|
||||
|
||||
def __init__(self, uri, *args, user_agent=DEFAULT_USER_AGENT, **kwargs):
|
||||
"""
|
||||
Constructs a new :class:`Session`.
|
||||
"""
|
||||
|
||||
if user_agent == DEFAULT_USER_AGENT:
|
||||
logger.warning("Sending requests with default User-Agent. " +
|
||||
"Set 'user_agent' on api.Session to quiet this " +
|
||||
"message.")
|
||||
|
||||
if 'headers' in kwargs:
|
||||
kwargs['headers']['User-Agent'] = str(user_agent)
|
||||
else:
|
||||
kwargs['headers'] = {'User-Agent': str(user_agent)}
|
||||
|
||||
super().__init__(uri, *args, **kwargs)
|
||||
|
||||
self.pages = Pages(self)
|
||||
"""
|
||||
An instance of :class:`mw.api.Pages`.
|
||||
"""
|
||||
|
||||
self.revisions = Revisions(self)
|
||||
"""
|
||||
An instance of :class:`mw.api.Revisions`.
|
||||
"""
|
||||
|
||||
self.recent_changes = RecentChanges(self)
|
||||
"""
|
||||
An instance of :class:`mw.api.RecentChanges`.
|
||||
"""
|
||||
|
||||
self.site_info = SiteInfo(self)
|
||||
"""
|
||||
An instance of :class:`mw.api.SiteInfo`.
|
||||
"""
|
||||
|
||||
self.user_contribs = UserContribs(self)
|
||||
"""
|
||||
An instance of :class:`mw.api.UserContribs`.
|
||||
"""
|
||||
|
||||
self.users = Users(self)
|
||||
"""
|
||||
An instance of :class:`mw.api.Users`.
|
||||
"""
|
||||
|
||||
self.deleted_revisions = DeletedRevisions(self)
|
||||
"""
|
||||
An instance of :class:`mw.api.DeletedRevisions`.
|
||||
"""
|
||||
|
||||
def login(self, username, password, token=None):
|
||||
"""
|
||||
Performs a login operation. This method usually makes two requests to
|
||||
API -- one to get a token and one to use the token to log in. If
|
||||
authentication fails, this method will throw an
|
||||
:class:`.errors.AuthenticationError`.
|
||||
|
||||
:Parameters:
|
||||
username : str
|
||||
Your username
|
||||
password : str
|
||||
Your password
|
||||
|
||||
:Returns:
|
||||
The response in a json :py:class:`dict`
|
||||
"""
|
||||
|
||||
doc = self.post(
|
||||
{
|
||||
'action': "login",
|
||||
'lgname': username,
|
||||
'lgpassword': password,
|
||||
'lgtoken': token, # If None, we'll be getting a token
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
try:
|
||||
if doc['login']['result'] == "Success":
|
||||
return doc
|
||||
elif doc['login']['result'] == "NeedToken":
|
||||
|
||||
if token is not None:
|
||||
# Woops. We've been here before. Better error out.
|
||||
raise AuthenticationError(doc)
|
||||
else:
|
||||
token = doc['login']['token']
|
||||
return self.login(username, password, token=token)
|
||||
else:
|
||||
raise AuthenticationError(doc)
|
||||
|
||||
except KeyError as e:
|
||||
raise MalformedResponse(e.message, doc)
|
||||
|
||||
|
||||
def request(self, type, params, **kwargs):
|
||||
params.update({'format': "json"})
|
||||
|
||||
doc = super().request(type, params, **kwargs).json()
|
||||
|
||||
if 'error' in doc:
|
||||
raise APIError(doc)
|
||||
|
||||
return doc
|
||||
Reference in New Issue
Block a user