1
0

Initial commit

p#	new file:   runwikiq.sh
This commit is contained in:
2018-06-02 15:32:19 -07:00
commit 72633c193b
202 changed files with 21929 additions and 0 deletions

View File

@@ -0,0 +1,5 @@
from . import errors
from .session import Session
from .collections import Pages, RecentChanges, Revisions, SiteInfo, \
UserContribs, DeletedRevisions

View File

@@ -0,0 +1,7 @@
from .deleted_revisions import DeletedRevisions
from .pages import Pages
from .recent_changes import RecentChanges
from .revisions import Revisions
from .site_info import SiteInfo
from .user_contribs import UserContribs
from .users import Users

View File

@@ -0,0 +1,68 @@
import re
class Collection:
"""
Represents a collection of items that can be queried via the API. This is
an abstract base class that should be extended
"""
TIMESTAMP = re.compile(r"[0-9]{4}-?[0-9]{2}-?[0-9]{2}T?" +
r"[0-9]{2}:?[0-9]{2}:?[0-9]{2}Z?")
"""
A regular expression for matching the API's timestamp format.
"""
DIRECTIONS = {'newer', 'older'}
"""
A set of potential direction names.
"""
def __init__(self, session):
"""
:Parameters:
session : `mw.api.Session`
An api session to use for post & get.
"""
self.session = session
def _check_direction(self, direction):
if direction is None:
return direction
else:
direction = str(direction)
assert direction in {None} | self.DIRECTIONS, \
"Direction must be one of {0}".format(self.DIRECTIONS)
return direction
def _check_timestamp(self, timestamp):
if timestamp is None:
return timestamp
else:
timestamp = str(timestamp)
if not self.TIMESTAMP.match(timestamp):
raise TypeError(
"{0} is not formatted like ".format(repr(timestamp)) +
"a MediaWiki timestamp."
)
return timestamp
def _items(self, items, none=True, levels=None, type=lambda val: val):
if none and items is None:
return None
else:
items = {str(type(item)) for item in items}
if levels is not None:
levels = {str(level) for level in levels}
assert len(items - levels) == 0, \
"items {0} not in levels {1}".format(
items - levels, levels)
return "|".join(items)

View File

@@ -0,0 +1,150 @@
import logging
import sys
from ...types import Timestamp
from ...util import none_or
from ..errors import MalformedResponse
from .collection import Collection
logger = logging.getLogger("mw.api.collections.deletedrevs")
class DeletedRevisions(Collection):
PROPERTIES = {'ids', 'flags', 'timestamp', 'user', 'userid', 'size',
'sha1', 'contentmodel', 'comment', 'parsedcomment', 'content',
'tags'}
# TODO:
# This is *not* the right way to do this, but it should work for all queries.
MAX_REVISIONS = 500
def get(self, rev_id, *args, **kwargs):
rev_id = int(rev_id)
revs = list(self.query(revids={rev_id}, **kwargs))
if len(revs) < 1:
raise KeyError(rev_id)
else:
return revs[0]
def query(self, *args, limit=sys.maxsize, **kwargs):
"""
Queries deleted revisions.
See https://www.mediawiki.org/wiki/API:Deletedrevs
:Parameters:
titles : set(str)
A set of page names to query (note that namespace prefix is expected)
start : :class:`mw.Timestamp`
A timestamp to start querying from
end : :class:`mw.Timestamp`
A timestamp to end querying
from_title : str
A title from which to start querying (alphabetically)
to_title : str
A title from which to stop querying (alphabetically)
prefix : str
A title prefix to match on
drcontinue : str
When more results are available, use this to continue (3) Note: may only work if drdir is set to newer.
unique : bool
List only one revision for each page
tag : str
Only list revision tagged with this tag
user : str
Only list revisions saved by this user_text
excludeuser : str
Do not list revision saved by this user_text
namespace : int
Only list pages in this namespace (id)
limit : int
Limit the number of results
direction : str
"newer" or "older"
properties : set(str)
A list of properties to include in the results:
* ids - The ID of the revision.
* flags - Revision flags (minor).
* timestamp - The timestamp of the revision.
* user - User that made the revision.
* userid - User ID of the revision creator.
* size - Length (bytes) of the revision.
* sha1 - SHA-1 (base 16) of the revision.
* contentmodel - Content model ID of the revision.
* comment - Comment by the user for the revision.
* parsedcomment - Parsed comment by the user for the revision.
* content - Text of the revision.
* tags - Tags for the revision.
"""
# `limit` means something diffent here
kwargs['limit'] = min(limit, self.MAX_REVISIONS)
revisions_yielded = 0
done = False
while not done and revisions_yielded <= limit:
rev_docs, query_continue = self._query(*args, **kwargs)
for doc in rev_docs:
yield doc
revisions_yielded += 1
if revisions_yielded >= limit:
break
if query_continue != "" and len(rev_docs) > 0:
kwargs['query_continue'] = query_continue
else:
done = True
def _query(self, titles=None, pageids=None, revids=None,
start=None, end=None, query_continue=None, unique=None, tag=None,
user=None, excludeuser=None, namespace=None, limit=None,
properties=None, direction=None):
params = {
'action': "query",
'prop': "deletedrevisions"
}
params['titles'] = self._items(titles)
params['pageids'] = self._items(pageids)
params['revids'] = self._items(revids)
params['drvprop'] = self._items(properties, levels=self.PROPERTIES)
params['drvlimit'] = none_or(limit, int)
params['drvstart'] = self._check_timestamp(start)
params['drvend'] = self._check_timestamp(end)
params['drvdir'] = self._check_direction(direction)
params['drvuser'] = none_or(user, str)
params['drvexcludeuser'] = none_or(excludeuser, int)
params['drvtag'] = none_or(tag, str)
params.update(query_continue or {'continue': ""})
doc = self.session.get(params)
doc_copy = dict(doc)
try:
if 'continue' in doc:
query_continue = doc['continue']
else:
query_continue = ''
pages = doc['query']['pages'].values()
rev_docs = []
for page_doc in pages:
page_rev_docs = page_doc.get('deletedrevisions', [])
try: del page_doc['deletedrevisions']
except KeyError: pass
for rev_doc in page_rev_docs:
rev_doc['page'] = page_doc
rev_docs.extend(page_rev_docs)
return rev_docs, query_continue
except KeyError as e:
raise MalformedResponse(str(e), doc)

View File

@@ -0,0 +1,50 @@
import logging
from ...util import none_or
from .collection import Collection
logger = logging.getLogger("mw.api.collections.pages")
class Pages(Collection):
"""
TODO
"""
def _edit(self, title=None, pageid=None, section=None, sectiontitle=None,
text=None, token=None, summary=None, minor=None,
notminor=None, bot=None, basetimestamp=None,
starttimestamp=None, recreate=None, createonly=None,
nocreate=None, watch=None, unwatch=None, watchlist=None,
md5=None, prependtext=None, appendtext=None, undo=None,
undoafter=None, redirect=None, contentformat=None,
contentmodel=None, assert_=None, nassert=None,
captchaword=None, captchaid=None):
params = {
'action': "edit"
}
params['title'] = none_or(title, str)
params['pageid'] = none_or(pageid, int)
params['section'] = none_or(section, int, levels={'new'})
params['sectiontitle'] = none_or(sectiontitle, str)
params['text'] = none_or(text, str)
params['token'] = none_or(token, str)
params['summary'] = none_or(summary, str)
params['minor'] = none_or(minor, bool)
params['notminor'] = none_or(notminor, bool)
params['bot'] = none_or(bot, bool)
params['basetimestamp'] = self._check_timestamp(basetimestamp)
params['starttimestamp'] = self._check_timestamp(starttimestamp)
params['recreate'] = none_or(recreate, bool)
params['createonly'] = none_or(createonly, bool)
params['nocreate'] = none_or(nocreate, bool)
params['watch'] = none_or(watch, bool)
params['unwatch'] = none_or(unwatch, bool)
params['watchlist'] = none_or(watchlist, bool)
params['md5'] = none_or(md5, str)
params['prependtext'] = none_or(prependtext, str)
params['appendtext'] = none_or(appendtext, str)
params['undo'] = none_or(undo, int)
params['undoafter'] = none_or(undoafter, int)
# TODO finish this

View File

@@ -0,0 +1,192 @@
import logging
import re
from ...util import none_or
from ..errors import MalformedResponse
from .collection import Collection
logger = logging.getLogger("mw.api.collections.recent_changes")
class RecentChanges(Collection):
"""
Recent changes (revisions, page creations, registrations, moves, etc.)
"""
RCCONTINUE = re.compile(r"([0-9]{4}-[0-9]{2}-[0-9]{2}T" +
r"[0-9]{2}:[0-9]{2}:[0-9]{2}Z|" +
r"[0-9]{14})" +
r"\|[0-9]+")
PROPERTIES = {'user', 'userid', 'comment', 'timestamp', 'title',
'ids', 'sizes', 'redirect', 'flags', 'loginfo',
'tags', 'sha1'}
SHOW = {'minor', '!minor', 'bot', '!bot', 'anon', '!anon',
'redirect', '!redirect', 'patrolled', '!patrolled'}
TYPES = {'edit', 'external', 'new', 'log'}
DIRECTIONS = {'newer', 'older'}
MAX_CHANGES = 50
def _check_rccontinue(self, rccontinue):
if rccontinue is None:
return None
elif self.RCCONTINUE.match(rccontinue):
return rccontinue
else:
raise TypeError(
"rccontinue {0} is not formatted correctly ".format(rccontinue) +
"'%Y-%m-%dT%H:%M:%SZ|<last_rcid>'"
)
def query(self, *args, limit=None, **kwargs):
"""
Enumerate recent changes.
See `<https://www.mediawiki.org/wiki/API:Recentchanges>`_
:Parameters:
start : :class:`mw.Timestamp`
The timestamp to start enumerating from
end : :class:`mw.Timestamp`
The timestamp to end enumerating
direction :
"newer" or "older"
namespace : int
Filter log entries to only this namespace(s)
user : str
Only list changes by this user
excludeuser : str
Don't list changes by this user
tag : str
Only list changes tagged with this tag
properties : set(str)
Include additional pieces of information
* user - Adds the user responsible for the edit and tags if they are an IP
* userid - Adds the user id responsible for the edit
* comment - Adds the comment for the edit
* parsedcomment - Adds the parsed comment for the edit
* flags - Adds flags for the edit
* timestamp - Adds timestamp of the edit
* title - Adds the page title of the edit
* ids - Adds the page ID, recent changes ID and the new and old revision ID
* sizes - Adds the new and old page length in bytes
* redirect - Tags edit if page is a redirect
* patrolled - Tags patrollable edits as being patrolled or unpatrolled
* loginfo - Adds log information (logid, logtype, etc) to log entries
* tags - Lists tags for the entry
* sha1 - Adds the content checksum for entries associated with a revision
token : set(str)
Which tokens to obtain for each change
* patrol
show : set(str)
Show only items that meet this criteria. For example, to see
only minor edits done by logged-in users, set
show={'minor', '!anon'}.
* minor
* !minor
* bot
* !bot
* anon
* !anon
* redirect
* !redirect
* patrolled
* !patrolled
* unpatrolled
limit : int
How many total changes to return
type : set(str)
Which types of changes to show
* edit
* external
* new
* log
toponly : bool
Only list changes which are the latest revision
rccontinue : str
Use this to continue loading results from where you last left off
"""
limit = none_or(limit, int)
changes_yielded = 0
done = False
while not done:
if limit is None:
kwargs['limit'] = self.MAX_CHANGES
else:
kwargs['limit'] = min(limit - changes_yielded, self.MAX_CHANGES)
rc_docs, rccontinue = self._query(*args, **kwargs)
for doc in rc_docs:
yield doc
changes_yielded += 1
if limit is not None and changes_yielded >= limit:
done = True
break
if rccontinue is not None and len(rc_docs) > 0:
kwargs['rccontinue'] = rccontinue
else:
done = True
def _query(self, start=None, end=None, direction=None, namespace=None,
user=None, excludeuser=None, tag=None, properties=None,
token=None, show=None, limit=None, type=None,
toponly=None, rccontinue=None):
params = {
'action': "query",
'list': "recentchanges"
}
params['rcstart'] = none_or(start, str)
params['rcend'] = none_or(end, str)
assert direction in {None} | self.DIRECTIONS, \
"Direction must be one of {0}".format(self.DIRECTIONS)
params['rcdir'] = direction
params['rcnamespace'] = none_or(namespace, int)
params['rcuser'] = none_or(user, str)
params['rcexcludeuser'] = none_or(excludeuser, str)
params['rctag'] = none_or(tag, str)
params['rcprop'] = self._items(properties, levels=self.PROPERTIES)
params['rctoken'] = none_or(tag, str)
params['rcshow'] = self._items(show, levels=self.SHOW)
params['rclimit'] = none_or(limit, int)
params['rctype'] = self._items(type, self.TYPES)
params['rctoponly'] = none_or(toponly, bool)
params['rccontinue'] = self._check_rccontinue(rccontinue)
doc = self.session.get(params)
try:
rc_docs = doc['query']['recentchanges']
if 'query-continue' in doc:
rccontinue = \
doc['query-continue']['recentchanges']['rccontinue']
elif len(rc_docs) > 0:
rccontinue = "|".join([rc_docs[-1]['timestamp'],
str(rc_docs[-1]['rcid'] + 1)])
else:
pass # Leave it be
except KeyError as e:
raise MalformedResponse(str(e), doc)
return rc_docs, rccontinue

View File

@@ -0,0 +1,220 @@
import logging
from ...util import none_or
from ..errors import MalformedResponse
from .collection import Collection
logger = logging.getLogger("mw.api.collections.revisions")
class Revisions(Collection):
"""
A collection of revisions indexes by title, page_id and user_text.
Note that revisions of deleted pages are queriable via
:class:`mw.api.DeletedRevs`.
"""
PROPERTIES = {'ids', 'flags', 'timestamp', 'user', 'userid', 'size',
'sha1', 'contentmodel', 'comment', 'parsedcomment',
'content', 'tags', 'flagged'}
DIFF_TO = {'prev', 'next', 'cur'}
# This is *not* the right way to do this, but it should work for all queries.
MAX_REVISIONS = 50
def get(self, rev_id, **kwargs):
"""
Get a single revision based on it's ID. Throws a :py:class:`KeyError`
if the rev_id cannot be found.
:Parameters:
rev_id : int
Revision ID
``**kwargs``
Passed to :py:meth:`query`
:Returns:
A single rev dict
"""
rev_id = int(rev_id)
revs = list(self.query(revids={rev_id}, **kwargs))
if len(revs) < 1:
raise KeyError(rev_id)
else:
return revs[0]
def query(self, *args, limit=None, **kwargs):
"""
Get revision information.
See `<https://www.mediawiki.org/wiki/API:Properties#revisions_.2F_rv>`_
:Parameters:
properties : set(str)
Which properties to get for each revision:
* ids - The ID of the revision
* flags - Revision flags (minor)
* timestamp - The timestamp of the revision
* user - User that made the revision
* userid - User id of revision creator
* size - Length (bytes) of the revision
* sha1 - SHA-1 (base 16) of the revision
* contentmodel - Content model id
* comment - Comment by the user for revision
* parsedcomment - Parsed comment by the user for the revision
* content - Text of the revision
* tags - Tags for the revision
limit : int
Limit how many revisions will be returned
No more than 500 (5000 for bots) allowed
start_id : int
From which revision id to start enumeration (enum)
end_id : int
Stop revision enumeration on this revid
start : :class:`mw.Timestamp`
From which revision timestamp to start enumeration (enum)
end : :class:`mw.Timestamp`
Enumerate up to this timestamp
direction : str
"newer" or "older"
user : str
Only include revisions made by user_text
excludeuser : bool
Exclude revisions made by user
tag : str
Only list revisions tagged with this tag
expandtemplates : bool
Expand templates in revision content (requires "content" propery)
generatexml : bool
Generate XML parse tree for revision content (requires "content" propery)
parse : bool
Parse revision content (requires "content" propery)
section : int
Only retrieve the content of this section number
token : set(str)
Which tokens to obtain for each revision
* rollback - See `<https://www.mediawiki.org/wiki/API:Edit_-_Rollback#Token>`_
rvcontinue : str
When more results are available, use this to continue
diffto : int
Revision ID to diff each revision to. Use "prev", "next" and
"cur" for the previous, next and current revision respectively
difftotext : str
Text to diff each revision to. Only diffs a limited number of
revisions. Overrides diffto. If section is set, only that
section will be diffed against this text
contentformat : str
Serialization format used for difftotext and expected for output of content
* text/x-wiki
* text/javascript
* text/css
* text/plain
* application/json
:Returns:
An iterator of rev dicts returned from the API.
"""
revisions_yielded = 0
done = False
while not done:
if limit == None:
kwargs['limit'] = self.MAX_REVISIONS
else:
kwargs['limit'] = min(limit - revisions_yielded, self.MAX_REVISIONS)
rev_docs, rvcontinue = self._query(*args, **kwargs)
for doc in rev_docs:
yield doc
revisions_yielded += 1
if limit != None and revisions_yielded >= limit:
done = True
break
if rvcontinue != None and len(rev_docs) > 0:
kwargs['rvcontinue'] = rvcontinue
else:
done = True
def _query(self, revids=None, titles=None, pageids=None, properties=None,
limit=None, start_id=None, end_id=None, start=None,
end=None, direction=None, user=None, excludeuser=None,
tag=None, expandtemplates=None, generatexml=None,
parse=None, section=None, token=None, rvcontinue=None,
diffto=None, difftotext=None, contentformat=None):
params = {
'action': "query",
'prop': "revisions",
'rawcontinue': ''
}
params['revids'] = self._items(revids, type=int)
params['titles'] = self._items(titles)
params['pageids'] = self._items(pageids, type=int)
params['rvprop'] = self._items(properties, levels=self.PROPERTIES)
if revids == None: # Can't have a limit unless revids is none
params['rvlimit'] = none_or(limit, int)
params['rvstartid'] = none_or(start_id, int)
params['rvendid'] = none_or(end_id, int)
params['rvstart'] = self._check_timestamp(start)
params['rvend'] = self._check_timestamp(end)
params['rvdir'] = self._check_direction(direction)
params['rvuser'] = none_or(user, str)
params['rvexcludeuser'] = none_or(excludeuser, int)
params['rvtag'] = none_or(tag, str)
params['rvexpandtemplates'] = none_or(expandtemplates, bool)
params['rvgeneratexml'] = none_or(generatexml, bool)
params['rvparse'] = none_or(parse, bool)
params['rvsection'] = none_or(section, int)
params['rvtoken'] = none_or(token, str)
params['rvcontinue'] = none_or(rvcontinue, str)
params['rvdiffto'] = self._check_diffto(diffto)
params['rvdifftotext'] = none_or(difftotext, str)
params['rvcontentformat'] = none_or(contentformat, str)
doc = self.session.get(params)
try:
if 'query-continue' in doc:
rvcontinue = doc['query-continue']['revisions']['rvcontinue']
else:
rvcontinue = None
pages = doc['query'].get('pages', {}).values()
rev_docs = []
for page_doc in pages:
if 'missing' in page_doc or 'revisions' not in page_doc: continue
page_rev_docs = page_doc['revisions']
del page_doc['revisions']
for rev_doc in page_rev_docs:
rev_doc['page'] = page_doc
rev_docs.extend(page_rev_docs)
return rev_docs, rvcontinue
except KeyError as e:
raise MalformedResponse(str(e), doc)
def _check_diffto(self, diffto):
if diffto == None or diffto in self.DIFF_TO:
return diffto
else:
return int(diffto)

View File

@@ -0,0 +1,81 @@
import logging
from ..errors import MalformedResponse
from .collection import Collection
logger = logging.getLogger("mw.api.collections.site_info")
class SiteInfo(Collection):
"""
General information about the site.
"""
PROPERTIES = {'general', 'namespaces', 'namespacealiases',
'specialpagealiases', 'magicwords', 'interwikimap',
'dbrepllag', 'statistics', 'usergroups', 'extensions',
'fileextensions', 'rightsinfo', 'languages', 'skins',
'extensiontags', 'functionhooks', 'showhooks',
'variables', 'protocols'}
FILTERIW = {'local', '!local'}
def query(self, properties=None, filteriw=None, showalldb=None,
numberinggroup=None, inlanguagecode=None):
"""
General information about the site.
See `<https://www.mediawiki.org/wiki/API:Meta#siteinfo_.2F_si>`_
:Parameters:
properties: set(str)
Which sysinfo properties to get:
* general - Overall system information
* namespaces - List of registered namespaces and their canonical names
* namespacealiases - List of registered namespace aliases
* specialpagealiases - List of special page aliases
* magicwords - List of magic words and their aliases
* statistics - Returns site statistics
* interwikimap - Returns interwiki map (optionally filtered, (optionally localised by using siinlanguagecode))
* dbrepllag - Returns database server with the highest replication lag
* usergroups - Returns user groups and the associated permissions
* extensions - Returns extensions installed on the wiki
* fileextensions - Returns list of file extensions allowed to be uploaded
* rightsinfo - Returns wiki rights (license) information if available
* restrictions - Returns information on available restriction (protection) types
* languages - Returns a list of languages MediaWiki supports(optionally localised by using siinlanguagecode)
* skins - Returns a list of all enabled skins
* extensiontags - Returns a list of parser extension tags
* functionhooks - Returns a list of parser function hooks
* showhooks - Returns a list of all subscribed hooks (contents of $wgHooks)
* variables - Returns a list of variable IDs
* protocols - Returns a list of protocols that are allowed in external links.
* defaultoptions - Returns the default values for user preferences.
filteriw : str
"local" or "!local" Return only local or only nonlocal entries of the interwiki map
showalldb : bool
List all database servers, not just the one lagging the most
numberingroup : bool
Lists the number of users in user groups
inlanguagecode : bool
Language code for localised language names (best effort, use CLDR extension)
"""
siprop = self._items(properties, levels=self.PROPERTIES)
doc = self.session.get(
{
'action': "query",
'meta': "siteinfo",
'siprop': siprop,
'sifilteriw': filteriw,
'sishowalldb': showalldb,
'sinumberinggroup': numberinggroup,
'siinlanguagecode': inlanguagecode
}
)
try:
return doc['query']
except KeyError as e:
raise MalformedResponse(str(e), doc)

View File

@@ -0,0 +1,132 @@
import logging
from ...util import none_or
from ..errors import MalformedResponse
from .collection import Collection
logger = logging.getLogger("mw.api.collections.user_contribs")
class UserContribs(Collection):
"""
A collection of revisions indexes by user.
"""
PROPERTIES = {'ids', 'title', 'timestamp', 'comment', 'parsedcomment',
'size', 'sizediff', 'flags', 'patrolled', 'tags'}
SHOW = {'minor', '!minor', 'patrolled', '!patrolled'}
MAX_REVISIONS = 50
def query(self, *args, limit=None, **kwargs):
"""
Get a user's revisions.
See `<https://www.mediawiki.org/wiki/API:Usercontribs>`_
:Parameters:
limit : int
The maximum number of contributions to return.
start : :class:`mw.Timestamp`
The start timestamp to return from
end : :class:`mw.Timestamp`
The end timestamp to return to
user : set(str)
The users to retrieve contributions for. Maximum number of values 50 (500 for bots)
userprefix : set(str)
Retrieve contributions for all users whose names begin with this value.
direction : str
"newer" or "older"
namespace : int
Only list contributions in these namespaces
properties :
Include additional pieces of information
* ids - Adds the page ID and revision ID
* title - Adds the title and namespace ID of the page
* timestamp - Adds the timestamp of the edit
* comment - Adds the comment of the edit
* parsedcomment - Adds the parsed comment of the edit
* size - Adds the new size of the edit
* sizediff - Adds the size delta of the edit against its parent
* flags - Adds flags of the edit
* patrolled - Tags patrolled edits
* tags - Lists tags for the edit
show : set(str)
Show only items that meet thse criteria, e.g. non minor edits only: ucshow=!minor.
NOTE: If ucshow=patrolled or ucshow=!patrolled is set, revisions older than
$wgRCMaxAge (2592000) won't be shown
* minor
* !minor,
* patrolled,
* !patrolled,
* top,
* !top,
* new,
* !new
tag : str
Only list revisions tagged with this tag
toponly : bool
DEPRECATED! Only list changes which are the latest revision
"""
limit = none_or(limit, int)
revisions_yielded = 0
done = False
while not done:
if limit is None:
kwargs['limit'] = self.MAX_REVISIONS
else:
kwargs['limit'] = min(limit - revisions_yielded, self.MAX_REVISIONS)
uc_docs, uccontinue = self._query(*args, **kwargs)
for doc in uc_docs:
yield doc
revisions_yielded += 1
if limit is not None and revisions_yielded >= limit:
done = True
break
if uccontinue is None or len(uc_docs) == 0:
done = True
else:
kwargs['uccontinue'] = uccontinue
def _query(self, user=None, userprefix=None, limit=None, start=None,
end=None, direction=None, namespace=None, properties=None,
show=None, tag=None, toponly=None,
uccontinue=None):
params = {
'action': "query",
'list': "usercontribs"
}
params['uclimit'] = none_or(limit, int)
params['ucstart'] = self._check_timestamp(start)
params['ucend'] = self._check_timestamp(end)
if uccontinue is not None:
params.update(uccontinue)
params['ucuser'] = self._items(user, type=str)
params['ucuserprefix'] = self._items(userprefix, type=str)
params['ucdir'] = self._check_direction(direction)
params['ucnamespace'] = none_or(namespace, int)
params['ucprop'] = self._items(properties, levels=self.PROPERTIES)
params['ucshow'] = self._items(show, levels=self.SHOW)
doc = self.session.get(params)
try:
if 'query-continue' in doc:
uccontinue = doc['query-continue']['usercontribs']
else:
uccontinue = None
uc_docs = doc['query']['usercontribs']
return uc_docs, uccontinue
except KeyError as e:
raise MalformedResponse(str(e), doc)

View File

@@ -0,0 +1,83 @@
import logging
from ...util import none_or
from ..errors import MalformedResponse
from .collection import Collection
logger = logging.getLogger("mw.api.collections.users")
class Users(Collection):
"""
A collection of information about users
"""
PROPERTIES = {'blockinfo', 'implicitgroups', 'groups', 'registration',
'emailable', 'editcount', 'gender'}
SHOW = {'minor', '!minor', 'patrolled', '!patrolled'}
MAX_REVISIONS = 50
def query(self, *args, **kwargs):
"""
Get a user's metadata.
See `<https://www.mediawiki.org/wiki/API:Users>`_
:Parameters:
users : str
The usernames of the users to be retrieved.
properties : set(str)
Include additional pieces of information
blockinfo - Tags if the user is blocked, by whom, and
for what reason
groups - Lists all the groups the user(s) belongs to
implicitgroups - Lists all the groups a user is automatically
a member of
rights - Lists all the rights the user(s) has
editcount - Adds the user's edit count
registration - Adds the user's registration timestamp
emailable - Tags if the user can and wants to receive
email through [[Special:Emailuser]]
gender - Tags the gender of the user. Returns "male",
"female", or "unknown"
"""
done = False
while not done:
us_docs, query_continue = self._query(*args, **kwargs)
for doc in us_docs:
yield doc
if query_continue is None or len(us_docs) == 0:
done = True
else:
kwargs['query_continue'] = query_continue
def _query(self, users, query_continue=None, properties=None):
params = {
'action': "query",
'list': "users"
}
params['ususers'] = self._items(users, type=str)
params['usprop'] = self._items(properties, levels=self.PROPERTIES)
if query_continue is not None:
params.update(query_continue)
doc = self.session.get(params)
try:
if 'query-continue' in doc:
query_continue = doc['query-continue']['users']
else:
query_continue = None
us_docs = doc['query']['users']
return us_docs, query_continue
except KeyError as e:
raise MalformedResponse(str(e), doc)

View File

@@ -0,0 +1,48 @@
class DocError(Exception):
def __init__(self, message, doc):
super().__init__(message)
self.doc = doc
"""
The document returned by the API that brought about this error.
"""
class APIError(DocError):
def __init__(self, doc):
code = doc.get('error', {}).get('code')
message = doc.get('error', {}).get('message')
super().__init__("{0}:{1}".format(code, message), doc)
self.code = code
"""
The error code returned by the api -- if available.
"""
self.message = message
"""
The error message returned by the api -- if available.
"""
class AuthenticationError(DocError):
def __init__(self, doc):
result = doc['login']['result']
super().__init__(result, doc)
self.result = result
"""
The result code of an authentication attempt.
"""
class MalformedResponse(DocError):
def __init__(self, key, doc):
super().__init__("Expected to find '{0}' in result.".format(key), doc)
self.key = key
"""
The expected, but missing key from the API call.
"""

View File

@@ -0,0 +1,134 @@
import logging
from ..util import api
from .collections import (DeletedRevisions, Pages, RecentChanges, Revisions,
SiteInfo, UserContribs, Users)
from .errors import APIError, AuthenticationError, MalformedResponse
logger = logging.getLogger("mw.api.session")
DEFAULT_USER_AGENT = "MediaWiki-Utilities"
"""
The default User-Agent to be sent with requests to the API.
"""
class Session(api.Session):
"""
Represents a connection to a MediaWiki API.
Cookies and other session information is preserved.
:Parameters:
uri : str
The base URI for the API to use. Usually ends in "api.php"
user_agent : str
The User-Agent to be sent with requests. Will raise a warning if
left to default value.
"""
def __init__(self, uri, *args, user_agent=DEFAULT_USER_AGENT, **kwargs):
"""
Constructs a new :class:`Session`.
"""
if user_agent == DEFAULT_USER_AGENT:
logger.warning("Sending requests with default User-Agent. " +
"Set 'user_agent' on api.Session to quiet this " +
"message.")
if 'headers' in kwargs:
kwargs['headers']['User-Agent'] = str(user_agent)
else:
kwargs['headers'] = {'User-Agent': str(user_agent)}
super().__init__(uri, *args, **kwargs)
self.pages = Pages(self)
"""
An instance of :class:`mw.api.Pages`.
"""
self.revisions = Revisions(self)
"""
An instance of :class:`mw.api.Revisions`.
"""
self.recent_changes = RecentChanges(self)
"""
An instance of :class:`mw.api.RecentChanges`.
"""
self.site_info = SiteInfo(self)
"""
An instance of :class:`mw.api.SiteInfo`.
"""
self.user_contribs = UserContribs(self)
"""
An instance of :class:`mw.api.UserContribs`.
"""
self.users = Users(self)
"""
An instance of :class:`mw.api.Users`.
"""
self.deleted_revisions = DeletedRevisions(self)
"""
An instance of :class:`mw.api.DeletedRevisions`.
"""
def login(self, username, password, token=None):
"""
Performs a login operation. This method usually makes two requests to
API -- one to get a token and one to use the token to log in. If
authentication fails, this method will throw an
:class:`.errors.AuthenticationError`.
:Parameters:
username : str
Your username
password : str
Your password
:Returns:
The response in a json :py:class:`dict`
"""
doc = self.post(
{
'action': "login",
'lgname': username,
'lgpassword': password,
'lgtoken': token, # If None, we'll be getting a token
}
)
try:
if doc['login']['result'] == "Success":
return doc
elif doc['login']['result'] == "NeedToken":
if token is not None:
# Woops. We've been here before. Better error out.
raise AuthenticationError(doc)
else:
token = doc['login']['token']
return self.login(username, password, token=token)
else:
raise AuthenticationError(doc)
except KeyError as e:
raise MalformedResponse(e.message, doc)
def request(self, type, params, **kwargs):
params.update({'format': "json"})
doc = super().request(type, params, **kwargs).json()
if 'error' in doc:
raise APIError(doc)
return doc