Initial commit

p# new file: runwikiq.sh
2018-06-02 15:32:19 -07:00
commit 72633c193b
202 changed files with 21929 additions and 0 deletions
--- a/mediawiki_dump_tools/Mediawiki-Utilities/mw/api/init.py
+++ b/mediawiki_dump_tools/Mediawiki-Utilities/mw/api/init.py
@@ -0,0 +1,5 @@
+from . import errors
+from .session import Session
+
+from .collections import Pages, RecentChanges, Revisions, SiteInfo, \
+    UserContribs, DeletedRevisions
--- a/mediawiki_dump_tools/Mediawiki-Utilities/mw/api/collections/init.py
+++ b/mediawiki_dump_tools/Mediawiki-Utilities/mw/api/collections/init.py
@@ -0,0 +1,7 @@
+from .deleted_revisions import DeletedRevisions
+from .pages import Pages
+from .recent_changes import RecentChanges
+from .revisions import Revisions
+from .site_info import SiteInfo
+from .user_contribs import UserContribs
+from .users import Users
--- a/mediawiki_dump_tools/Mediawiki-Utilities/mw/api/collections/collection.py
+++ b/mediawiki_dump_tools/Mediawiki-Utilities/mw/api/collections/collection.py
@@ -0,0 +1,68 @@
+import re
+
+
+class Collection:
+    """
+    Represents a collection of items that can be queried via the API.  This is
+    an abstract base class that should be extended
+    """
+
+    TIMESTAMP = re.compile(r"[0-9]{4}-?[0-9]{2}-?[0-9]{2}T?" +
+                           r"[0-9]{2}:?[0-9]{2}:?[0-9]{2}Z?")
+    """
+    A regular expression for matching the API's timestamp format.
+    """
+
+    DIRECTIONS = {'newer', 'older'}
+    """
+    A set of potential direction names.
+    """
+
+    def __init__(self, session):
+        """
+        :Parameters:
+            session : `mw.api.Session`
+                An api session to use for post & get.
+        """
+        self.session = session
+    
+    def _check_direction(self, direction):
+        if direction is None:
+            return direction
+        else:
+            direction = str(direction)
+
+            assert direction in {None} | self.DIRECTIONS, \
+                "Direction must be one of {0}".format(self.DIRECTIONS)
+
+            return direction
+
+    def _check_timestamp(self, timestamp):
+        if timestamp is None:
+            return timestamp
+        else:
+            timestamp = str(timestamp)
+
+            if not self.TIMESTAMP.match(timestamp):
+                raise TypeError(
+                    "{0} is not formatted like ".format(repr(timestamp)) +
+                    "a MediaWiki timestamp."
+                )
+
+            return timestamp
+
+    def _items(self, items, none=True, levels=None, type=lambda val: val):
+
+        if none and items is None:
+            return None
+        else:
+            items = {str(type(item)) for item in items}
+
+            if levels is not None:
+                levels = {str(level) for level in levels}
+
+                assert len(items - levels) == 0, \
+                    "items {0} not in levels {1}".format(
+                        items - levels, levels)
+
+            return "|".join(items)
--- a/mediawiki_dump_tools/Mediawiki-Utilities/mw/api/collections/deleted_revisions.py
+++ b/mediawiki_dump_tools/Mediawiki-Utilities/mw/api/collections/deleted_revisions.py
@@ -0,0 +1,150 @@
+import logging
+import sys
+
+from ...types import Timestamp
+from ...util import none_or
+from ..errors import MalformedResponse
+from .collection import Collection
+
+logger = logging.getLogger("mw.api.collections.deletedrevs")
+
+
+class DeletedRevisions(Collection):
+    PROPERTIES = {'ids', 'flags', 'timestamp', 'user', 'userid', 'size',
+                  'sha1', 'contentmodel', 'comment', 'parsedcomment', 'content',
+                  'tags'}
+
+    # TODO:
+    # This is *not* the right way to do this, but it should work for all queries.
+    MAX_REVISIONS = 500
+
+    def get(self, rev_id, *args, **kwargs):
+
+        rev_id = int(rev_id)
+
+        revs = list(self.query(revids={rev_id}, **kwargs))
+
+        if len(revs) < 1:
+            raise KeyError(rev_id)
+        else:
+            return revs[0]
+
+    def query(self, *args, limit=sys.maxsize, **kwargs):
+        """
+        Queries deleted revisions.
+        See https://www.mediawiki.org/wiki/API:Deletedrevs
+
+        :Parameters:
+            titles : set(str)
+                A set of page names to query (note that namespace prefix is expected)
+            start : :class:`mw.Timestamp`
+                A timestamp to start querying from
+            end : :class:`mw.Timestamp`
+                A timestamp to end querying
+            from_title : str
+                A title from which to start querying (alphabetically)
+            to_title : str
+                A title from which to stop querying (alphabetically)
+            prefix : str
+                A title prefix to match on
+            drcontinue : str
+                When more results are available, use this to continue (3) Note: may only work if drdir is set to newer.
+            unique : bool
+                List only one revision for each page
+            tag : str
+                Only list revision tagged with this tag
+            user : str
+                Only list revisions saved by this user_text
+            excludeuser : str
+                Do not list revision saved by this user_text
+            namespace : int
+                Only list pages in this namespace (id)
+            limit : int
+                Limit the number of results
+            direction : str
+                "newer" or "older"
+            properties : set(str)
+                A list of properties to include in the results:
+
+
+                * ids            - The ID of the revision.
+                * flags          - Revision flags (minor).
+                * timestamp      - The timestamp of the revision.
+                * user           - User that made the revision.
+                * userid         - User ID of the revision creator.
+                * size           - Length (bytes) of the revision.
+                * sha1           - SHA-1 (base 16) of the revision.
+                * contentmodel   - Content model ID of the revision.
+                * comment        - Comment by the user for the revision.
+                * parsedcomment  - Parsed comment by the user for the revision.
+                * content        - Text of the revision.
+                * tags           - Tags for the revision.
+        """
+        # `limit` means something diffent here
+        kwargs['limit'] = min(limit, self.MAX_REVISIONS)
+        revisions_yielded = 0
+        done = False
+        while not done and revisions_yielded <= limit:
+            rev_docs, query_continue = self._query(*args, **kwargs)
+            for doc in rev_docs:
+                yield doc
+                revisions_yielded += 1
+                if revisions_yielded >= limit:
+                    break
+
+            if query_continue != "" and len(rev_docs) > 0:
+                kwargs['query_continue'] = query_continue
+            else:
+                done = True
+
+    def _query(self, titles=None, pageids=None, revids=None,
+               start=None, end=None, query_continue=None, unique=None, tag=None,
+               user=None, excludeuser=None, namespace=None, limit=None,
+               properties=None, direction=None):
+
+        params = {
+            'action': "query",
+            'prop': "deletedrevisions"
+        }
+
+        params['titles'] = self._items(titles)
+        params['pageids'] = self._items(pageids)
+        params['revids'] = self._items(revids)
+        params['drvprop'] = self._items(properties, levels=self.PROPERTIES)
+        params['drvlimit'] = none_or(limit, int)
+        params['drvstart'] = self._check_timestamp(start)
+        params['drvend'] = self._check_timestamp(end)
+
+        params['drvdir'] = self._check_direction(direction)
+        params['drvuser'] = none_or(user, str)
+        params['drvexcludeuser'] = none_or(excludeuser, int)
+        params['drvtag'] = none_or(tag, str)
+        params.update(query_continue or {'continue': ""})
+
+        doc = self.session.get(params)
+        doc_copy = dict(doc)
+
+        try:
+            if 'continue' in doc:
+                query_continue = doc['continue']
+            else:
+                query_continue = ''
+
+            pages = doc['query']['pages'].values()
+            rev_docs = []
+
+            for page_doc in pages:
+                page_rev_docs = page_doc.get('deletedrevisions', [])
+
+                try: del page_doc['deletedrevisions']
+                except KeyError: pass
+
+                for rev_doc in page_rev_docs:
+                    rev_doc['page'] = page_doc
+
+                rev_docs.extend(page_rev_docs)
+
+            return rev_docs, query_continue
+
+        except KeyError as e:
+            raise MalformedResponse(str(e), doc)
--- a/mediawiki_dump_tools/Mediawiki-Utilities/mw/api/collections/pages.py
+++ b/mediawiki_dump_tools/Mediawiki-Utilities/mw/api/collections/pages.py
@@ -0,0 +1,50 @@
+import logging
+
+from ...util import none_or
+from .collection import Collection
+
+logger = logging.getLogger("mw.api.collections.pages")
+
+
+class Pages(Collection):
+    """
+    TODO
+    """
+
+    def _edit(self, title=None, pageid=None, section=None, sectiontitle=None,
+              text=None, token=None, summary=None, minor=None,
+              notminor=None, bot=None, basetimestamp=None,
+              starttimestamp=None, recreate=None, createonly=None,
+              nocreate=None, watch=None, unwatch=None, watchlist=None,
+              md5=None, prependtext=None, appendtext=None, undo=None,
+              undoafter=None, redirect=None, contentformat=None,
+              contentmodel=None, assert_=None, nassert=None,
+              captchaword=None, captchaid=None):
+        params = {
+            'action': "edit"
+        }
+        params['title'] = none_or(title, str)
+        params['pageid'] = none_or(pageid, int)
+        params['section'] = none_or(section, int, levels={'new'})
+        params['sectiontitle'] = none_or(sectiontitle, str)
+        params['text'] = none_or(text, str)
+        params['token'] = none_or(token, str)
+        params['summary'] = none_or(summary, str)
+        params['minor'] = none_or(minor, bool)
+        params['notminor'] = none_or(notminor, bool)
+        params['bot'] = none_or(bot, bool)
+        params['basetimestamp'] = self._check_timestamp(basetimestamp)
+        params['starttimestamp'] = self._check_timestamp(starttimestamp)
+        params['recreate'] = none_or(recreate, bool)
+        params['createonly'] = none_or(createonly, bool)
+        params['nocreate'] = none_or(nocreate, bool)
+        params['watch'] = none_or(watch, bool)
+        params['unwatch'] = none_or(unwatch, bool)
+        params['watchlist'] = none_or(watchlist, bool)
+        params['md5'] = none_or(md5, str)
+        params['prependtext'] = none_or(prependtext, str)
+        params['appendtext'] = none_or(appendtext, str)
+        params['undo'] = none_or(undo, int)
+        params['undoafter'] = none_or(undoafter, int)
+
+        # TODO finish this
--- a/mediawiki_dump_tools/Mediawiki-Utilities/mw/api/collections/recent_changes.py
+++ b/mediawiki_dump_tools/Mediawiki-Utilities/mw/api/collections/recent_changes.py
@@ -0,0 +1,192 @@
+import logging
+import re
+
+from ...util import none_or
+from ..errors import MalformedResponse
+from .collection import Collection
+
+logger = logging.getLogger("mw.api.collections.recent_changes")
+
+
+class RecentChanges(Collection):
+    """
+    Recent changes (revisions, page creations, registrations, moves, etc.)
+    """
+
+    RCCONTINUE = re.compile(r"([0-9]{4}-[0-9]{2}-[0-9]{2}T" +
+                            r"[0-9]{2}:[0-9]{2}:[0-9]{2}Z|" +
+                            r"[0-9]{14})" +
+                            r"\|[0-9]+")
+
+    PROPERTIES = {'user', 'userid', 'comment', 'timestamp', 'title',
+                  'ids', 'sizes', 'redirect', 'flags', 'loginfo',
+                  'tags', 'sha1'}
+
+    SHOW = {'minor', '!minor', 'bot', '!bot', 'anon', '!anon',
+            'redirect', '!redirect', 'patrolled', '!patrolled'}
+    
+    TYPES = {'edit', 'external', 'new', 'log'}
+    
+    DIRECTIONS = {'newer', 'older'}
+
+    MAX_CHANGES = 50
+
+    def _check_rccontinue(self, rccontinue):
+        if rccontinue is None:
+            return None
+        elif self.RCCONTINUE.match(rccontinue):
+            return rccontinue
+        else:
+            raise TypeError(
+                "rccontinue {0} is not formatted correctly ".format(rccontinue) +
+                "'%Y-%m-%dT%H:%M:%SZ|<last_rcid>'"
+            )
+
+    def query(self, *args, limit=None, **kwargs):
+        """
+        Enumerate recent changes.
+        See `<https://www.mediawiki.org/wiki/API:Recentchanges>`_
+
+        :Parameters:
+            start : :class:`mw.Timestamp`
+                The timestamp to start enumerating from
+            end : :class:`mw.Timestamp`
+                The timestamp to end enumerating
+            direction :
+                "newer" or "older"
+            namespace : int
+                Filter log entries to only this namespace(s)
+            user : str
+                Only list changes by this user
+            excludeuser : str
+                Don't list changes by this user
+            tag : str
+                Only list changes tagged with this tag
+            properties : set(str)
+                Include additional pieces of information
+
+                * user           - Adds the user responsible for the edit and tags if they are an IP
+                * userid         - Adds the user id responsible for the edit
+                * comment        - Adds the comment for the edit
+                * parsedcomment  - Adds the parsed comment for the edit
+                * flags          - Adds flags for the edit
+                * timestamp      - Adds timestamp of the edit
+                * title          - Adds the page title of the edit
+                * ids            - Adds the page ID, recent changes ID and the new and old revision ID
+                * sizes          - Adds the new and old page length in bytes
+                * redirect       - Tags edit if page is a redirect
+                * patrolled      - Tags patrollable edits as being patrolled or unpatrolled
+                * loginfo        - Adds log information (logid, logtype, etc) to log entries
+                * tags           - Lists tags for the entry
+                * sha1           - Adds the content checksum for entries associated with a revision
+
+            token : set(str)
+                Which tokens to obtain for each change
+
+                * patrol
+
+            show : set(str)
+                Show only items that meet this criteria. For example, to see
+                only minor edits done by logged-in users, set
+                show={'minor', '!anon'}.
+
+                * minor
+                * !minor
+                * bot
+                * !bot
+                * anon
+                * !anon
+                * redirect
+                * !redirect
+                * patrolled
+                * !patrolled
+                * unpatrolled
+            limit : int
+                How many total changes to return
+            type : set(str)
+                Which types of changes to show
+
+                * edit
+                * external
+                * new
+                * log
+
+            toponly : bool
+                Only list changes which are the latest revision
+            rccontinue : str
+                Use this to continue loading results from where you last left off
+        """
+        limit = none_or(limit, int)
+
+        changes_yielded = 0
+        done = False
+        while not done:
+
+            if limit is None:
+                kwargs['limit'] = self.MAX_CHANGES
+            else:
+                kwargs['limit'] = min(limit - changes_yielded, self.MAX_CHANGES)
+
+            rc_docs, rccontinue = self._query(*args, **kwargs)
+
+            for doc in rc_docs:
+                yield doc
+                changes_yielded += 1
+
+                if limit is not None and changes_yielded >= limit:
+                    done = True
+                    break
+
+            if rccontinue is not None and len(rc_docs) > 0:
+
+                kwargs['rccontinue'] = rccontinue
+            else:
+                done = True
+
+    def _query(self, start=None, end=None, direction=None, namespace=None,
+               user=None, excludeuser=None, tag=None, properties=None,
+               token=None, show=None, limit=None, type=None,
+               toponly=None, rccontinue=None):
+
+        params = {
+            'action': "query",
+            'list': "recentchanges"
+        }
+
+        params['rcstart'] = none_or(start, str)
+        params['rcend'] = none_or(end, str)
+
+        assert direction in {None} | self.DIRECTIONS, \
+            "Direction must be one of {0}".format(self.DIRECTIONS)
+
+        params['rcdir'] = direction
+        params['rcnamespace'] = none_or(namespace, int)
+        params['rcuser'] = none_or(user, str)
+        params['rcexcludeuser'] = none_or(excludeuser, str)
+        params['rctag'] = none_or(tag, str)
+        params['rcprop'] = self._items(properties, levels=self.PROPERTIES)
+        params['rctoken'] = none_or(tag, str)
+        params['rcshow'] = self._items(show, levels=self.SHOW)
+        params['rclimit'] = none_or(limit, int)
+        params['rctype'] = self._items(type, self.TYPES)
+        params['rctoponly'] = none_or(toponly, bool)
+        params['rccontinue'] = self._check_rccontinue(rccontinue)
+
+        doc = self.session.get(params)
+
+        try:
+            rc_docs = doc['query']['recentchanges']
+
+            if 'query-continue' in doc:
+                rccontinue = \
+                        doc['query-continue']['recentchanges']['rccontinue']
+            elif len(rc_docs) > 0:
+                rccontinue = "|".join([rc_docs[-1]['timestamp'],
+                                       str(rc_docs[-1]['rcid'] + 1)])
+            else:
+                pass  # Leave it be
+
+        except KeyError as e:
+            raise MalformedResponse(str(e), doc)
+
+        return rc_docs, rccontinue
--- a/mediawiki_dump_tools/Mediawiki-Utilities/mw/api/collections/revisions.py
+++ b/mediawiki_dump_tools/Mediawiki-Utilities/mw/api/collections/revisions.py
@@ -0,0 +1,220 @@
+import logging
+
+from ...util import none_or
+from ..errors import MalformedResponse
+from .collection import Collection
+
+logger = logging.getLogger("mw.api.collections.revisions")
+
+
+class Revisions(Collection):
+    """
+    A collection of revisions indexes by title, page_id and user_text.
+    Note that revisions of deleted pages are queriable via
+    :class:`mw.api.DeletedRevs`.
+    """
+    
+    PROPERTIES = {'ids', 'flags', 'timestamp', 'user', 'userid', 'size',
+                  'sha1', 'contentmodel', 'comment', 'parsedcomment',
+                  'content', 'tags', 'flagged'}
+    
+    DIFF_TO = {'prev', 'next', 'cur'}
+    
+    # This is *not* the right way to do this, but it should work for all queries.
+    MAX_REVISIONS = 50
+    
+    def get(self, rev_id, **kwargs):
+        """
+        Get a single revision based on it's ID.  Throws a :py:class:`KeyError`
+        if the rev_id cannot be found.
+        
+        :Parameters:
+            rev_id : int
+                Revision ID
+            ``**kwargs``
+                Passed to :py:meth:`query`
+            
+        :Returns:
+            A single rev dict
+        """
+        rev_id = int(rev_id)
+        
+        revs = list(self.query(revids={rev_id}, **kwargs))
+        
+        if len(revs) < 1:
+            raise KeyError(rev_id)
+        else:
+            return revs[0]
+        
+    def query(self, *args, limit=None, **kwargs):
+        """
+        Get revision information.
+        See `<https://www.mediawiki.org/wiki/API:Properties#revisions_.2F_rv>`_
+        
+        :Parameters:
+            properties : set(str)
+                Which properties to get for each revision:
+                
+                * ids            - The ID of the revision
+                * flags          - Revision flags (minor)
+                * timestamp      - The timestamp of the revision
+                * user           - User that made the revision
+                * userid         - User id of revision creator
+                * size           - Length (bytes) of the revision
+                * sha1           - SHA-1 (base 16) of the revision
+                * contentmodel   - Content model id
+                * comment        - Comment by the user for revision
+                * parsedcomment  - Parsed comment by the user for the revision
+                * content        - Text of the revision
+                * tags           - Tags for the revision
+            limit : int
+                Limit how many revisions will be returned
+                No more than 500 (5000 for bots) allowed
+            start_id : int
+                From which revision id to start enumeration (enum)
+            end_id : int
+                Stop revision enumeration on this revid
+            start : :class:`mw.Timestamp`
+                From which revision timestamp to start enumeration (enum)
+            end : :class:`mw.Timestamp`
+                Enumerate up to this timestamp
+            direction : str
+                "newer" or "older"
+            user : str
+                Only include revisions made by user_text
+            excludeuser : bool
+                Exclude revisions made by user
+            tag : str
+                Only list revisions tagged with this tag
+            expandtemplates : bool
+                Expand templates in revision content (requires "content" propery)
+            generatexml : bool
+                Generate XML parse tree for revision content (requires "content" propery)
+            parse : bool
+                Parse revision content (requires "content" propery)
+            section : int
+                Only retrieve the content of this section number
+            token : set(str)
+                Which tokens to obtain for each revision
+                
+                * rollback - See `<https://www.mediawiki.org/wiki/API:Edit_-_Rollback#Token>`_
+            rvcontinue : str
+                When more results are available, use this to continue
+            diffto : int
+                Revision ID to diff each revision to. Use "prev", "next" and
+                "cur" for the previous, next and current revision respectively
+            difftotext : str
+                Text to diff each revision to. Only diffs a limited number of
+                revisions. Overrides diffto. If section is set, only that
+                section will be diffed against this text
+            contentformat : str
+                Serialization format used for difftotext and expected for output of content
+                
+                * text/x-wiki
+                * text/javascript
+                * text/css
+                * text/plain
+                * application/json
+        
+        :Returns:
+            An iterator of rev dicts returned from the API.
+        """
+        
+        revisions_yielded = 0
+        done = False
+        while not done:
+            if limit == None:
+                kwargs['limit'] = self.MAX_REVISIONS
+            else:
+                kwargs['limit'] = min(limit - revisions_yielded, self.MAX_REVISIONS)
+            
+            rev_docs, rvcontinue = self._query(*args, **kwargs)
+            
+            for doc in rev_docs:
+                yield doc
+                revisions_yielded += 1
+                
+                if limit != None and revisions_yielded >= limit:
+                    done = True
+                    break
+                
+            if rvcontinue != None and len(rev_docs) > 0:
+                kwargs['rvcontinue'] = rvcontinue
+            else:
+                done = True
+            
+    
+    def _query(self, revids=None, titles=None, pageids=None, properties=None,
+                     limit=None, start_id=None, end_id=None, start=None,
+                     end=None, direction=None, user=None, excludeuser=None,
+                     tag=None, expandtemplates=None, generatexml=None,
+                     parse=None, section=None, token=None, rvcontinue=None,
+                     diffto=None, difftotext=None, contentformat=None):
+        
+        params = {
+            'action': "query",
+            'prop': "revisions",
+            'rawcontinue': ''
+        }
+        
+        params['revids'] = self._items(revids, type=int)
+        params['titles'] = self._items(titles)
+        params['pageids'] = self._items(pageids, type=int)
+        
+        params['rvprop'] = self._items(properties, levels=self.PROPERTIES)
+        
+        if revids == None: # Can't have a limit unless revids is none
+            params['rvlimit'] = none_or(limit, int)
+            
+        params['rvstartid'] = none_or(start_id, int)
+        params['rvendid'] = none_or(end_id, int)
+        params['rvstart'] = self._check_timestamp(start)
+        params['rvend'] = self._check_timestamp(end)
+        
+        params['rvdir'] = self._check_direction(direction)
+        params['rvuser'] = none_or(user, str)
+        params['rvexcludeuser'] = none_or(excludeuser, int)
+        params['rvtag'] = none_or(tag, str)
+        params['rvexpandtemplates'] = none_or(expandtemplates, bool)
+        params['rvgeneratexml'] = none_or(generatexml, bool)
+        params['rvparse'] = none_or(parse, bool)
+        params['rvsection'] = none_or(section, int)
+        params['rvtoken'] = none_or(token, str)
+        params['rvcontinue'] = none_or(rvcontinue, str)
+        params['rvdiffto'] = self._check_diffto(diffto)
+        params['rvdifftotext'] = none_or(difftotext, str)
+        params['rvcontentformat'] = none_or(contentformat, str)
+        
+        doc = self.session.get(params)
+        
+        try:
+            if 'query-continue' in doc:
+                rvcontinue = doc['query-continue']['revisions']['rvcontinue']
+            else:
+                rvcontinue = None
+            
+            pages = doc['query'].get('pages', {}).values()
+            rev_docs = []
+            
+            for page_doc in pages:
+                if 'missing' in page_doc or 'revisions' not in page_doc: continue
+                
+                page_rev_docs = page_doc['revisions']
+                del page_doc['revisions']
+                
+                for rev_doc in page_rev_docs:
+                    rev_doc['page'] = page_doc
+                
+                rev_docs.extend(page_rev_docs)
+            
+            return rev_docs, rvcontinue
+            
+        except KeyError as e:
+            raise MalformedResponse(str(e), doc)
+        
+    
+    def _check_diffto(self, diffto):
+        if diffto == None or diffto in self.DIFF_TO:
+            return diffto
+        else:
+            return int(diffto)
--- a/mediawiki_dump_tools/Mediawiki-Utilities/mw/api/collections/site_info.py
+++ b/mediawiki_dump_tools/Mediawiki-Utilities/mw/api/collections/site_info.py
@@ -0,0 +1,81 @@
+import logging
+
+from ..errors import MalformedResponse
+from .collection import Collection
+
+logger = logging.getLogger("mw.api.collections.site_info")
+
+
+class SiteInfo(Collection):
+    """
+    General information about the site.
+    """
+
+    PROPERTIES = {'general', 'namespaces', 'namespacealiases',
+                  'specialpagealiases', 'magicwords', 'interwikimap',
+                  'dbrepllag', 'statistics', 'usergroups', 'extensions',
+                  'fileextensions', 'rightsinfo', 'languages', 'skins',
+                  'extensiontags', 'functionhooks', 'showhooks',
+                  'variables', 'protocols'}
+
+    FILTERIW = {'local', '!local'}
+
+    def query(self, properties=None, filteriw=None, showalldb=None,
+              numberinggroup=None, inlanguagecode=None):
+        """
+        General information about the site.
+        See `<https://www.mediawiki.org/wiki/API:Meta#siteinfo_.2F_si>`_
+
+        :Parameters:
+            properties: set(str)
+                Which sysinfo properties to get:
+
+                * general               - Overall system information
+                * namespaces            - List of registered namespaces and their canonical names
+                * namespacealiases      - List of registered namespace aliases
+                * specialpagealiases    - List of special page aliases
+                * magicwords            - List of magic words and their aliases
+                * statistics            - Returns site statistics
+                * interwikimap          - Returns interwiki map (optionally filtered, (optionally localised by using siinlanguagecode))
+                * dbrepllag             - Returns database server with the highest replication lag
+                * usergroups            - Returns user groups and the associated permissions
+                * extensions            - Returns extensions installed on the wiki
+                * fileextensions        - Returns list of file extensions allowed to be uploaded
+                * rightsinfo            - Returns wiki rights (license) information if available
+                * restrictions          - Returns information on available restriction (protection) types
+                * languages             - Returns a list of languages MediaWiki supports(optionally localised by using siinlanguagecode)
+                * skins                 - Returns a list of all enabled skins
+                * extensiontags         - Returns a list of parser extension tags
+                * functionhooks         - Returns a list of parser function hooks
+                * showhooks             - Returns a list of all subscribed hooks (contents of $wgHooks)
+                * variables             - Returns a list of variable IDs
+                * protocols             - Returns a list of protocols that are allowed in external links.
+                * defaultoptions        - Returns the default values for user preferences.
+            filteriw : str
+                "local" or "!local" Return only local or only nonlocal entries of the interwiki map
+            showalldb : bool
+                List all database servers, not just the one lagging the most
+            numberingroup : bool
+                Lists the number of users in user groups
+            inlanguagecode : bool
+                Language code for localised language names (best effort, use CLDR extension)
+  """
+
+        siprop = self._items(properties, levels=self.PROPERTIES)
+
+        doc = self.session.get(
+            {
+                'action': "query",
+                'meta': "siteinfo",
+                'siprop': siprop,
+                'sifilteriw': filteriw,
+                'sishowalldb': showalldb,
+                'sinumberinggroup': numberinggroup,
+                'siinlanguagecode': inlanguagecode
+            }
+        )
+
+        try:
+            return doc['query']
+        except KeyError as e:
+            raise MalformedResponse(str(e), doc)
--- a/mediawiki_dump_tools/Mediawiki-Utilities/mw/api/collections/user_contribs.py
+++ b/mediawiki_dump_tools/Mediawiki-Utilities/mw/api/collections/user_contribs.py
@@ -0,0 +1,132 @@
+import logging
+
+from ...util import none_or
+from ..errors import MalformedResponse
+from .collection import Collection
+
+logger = logging.getLogger("mw.api.collections.user_contribs")
+
+
+class UserContribs(Collection):
+    """
+    A collection of revisions indexes by user.
+    """
+
+    PROPERTIES = {'ids', 'title', 'timestamp', 'comment', 'parsedcomment',
+                  'size', 'sizediff', 'flags', 'patrolled', 'tags'}
+
+    SHOW = {'minor', '!minor', 'patrolled', '!patrolled'}
+
+    MAX_REVISIONS = 50
+
+    def query(self, *args, limit=None, **kwargs):
+        """
+        Get a user's revisions.
+        See `<https://www.mediawiki.org/wiki/API:Usercontribs>`_
+
+        :Parameters:
+            limit : int
+                The maximum number of contributions to return.
+            start : :class:`mw.Timestamp`
+                The start timestamp to return from
+            end : :class:`mw.Timestamp`
+                The end timestamp to return to
+            user : set(str)
+                The users to retrieve contributions for.  Maximum number of values 50 (500 for bots)
+            userprefix : set(str)
+                Retrieve contributions for all users whose names begin with this value.
+            direction : str
+                "newer" or "older"
+            namespace : int
+                Only list contributions in these namespaces
+            properties :
+                Include additional pieces of information
+
+                * ids            - Adds the page ID and revision ID
+                * title          - Adds the title and namespace ID of the page
+                * timestamp      - Adds the timestamp of the edit
+                * comment        - Adds the comment of the edit
+                * parsedcomment  - Adds the parsed comment of the edit
+                * size           - Adds the new size of the edit
+                * sizediff       - Adds the size delta of the edit against its parent
+                * flags          - Adds flags of the edit
+                * patrolled      - Tags patrolled edits
+                * tags           - Lists tags for the edit
+            show : set(str)
+                Show only items that meet thse criteria, e.g. non minor edits only: ucshow=!minor.
+                NOTE: If ucshow=patrolled or ucshow=!patrolled is set, revisions older than
+                $wgRCMaxAge (2592000) won't be shown
+
+                * minor
+                * !minor,
+                * patrolled,
+                * !patrolled,
+                * top,
+                * !top,
+                * new,
+                * !new
+            tag : str
+                Only list revisions tagged with this tag
+            toponly : bool
+                DEPRECATED! Only list changes which are the latest revision
+        """
+        limit = none_or(limit, int)
+
+        revisions_yielded = 0
+        done = False
+        while not done:
+
+            if limit is None:
+                kwargs['limit'] = self.MAX_REVISIONS
+            else:
+                kwargs['limit'] = min(limit - revisions_yielded, self.MAX_REVISIONS)
+
+            uc_docs, uccontinue = self._query(*args, **kwargs)
+
+            for doc in uc_docs:
+                yield doc
+                revisions_yielded += 1
+
+                if limit is not None and revisions_yielded >= limit:
+                    done = True
+                    break
+
+            if uccontinue is None or len(uc_docs) == 0:
+                done = True
+            else:
+                kwargs['uccontinue'] = uccontinue
+
+    def _query(self, user=None, userprefix=None, limit=None, start=None,
+               end=None, direction=None, namespace=None, properties=None,
+               show=None, tag=None, toponly=None,
+               uccontinue=None):
+
+        params = {
+            'action': "query",
+            'list': "usercontribs"
+        }
+        params['uclimit'] = none_or(limit, int)
+        params['ucstart'] = self._check_timestamp(start)
+        params['ucend'] = self._check_timestamp(end)
+        if uccontinue is not None:
+            params.update(uccontinue)
+        params['ucuser'] = self._items(user, type=str)
+        params['ucuserprefix'] = self._items(userprefix, type=str)
+        params['ucdir'] = self._check_direction(direction)
+        params['ucnamespace'] = none_or(namespace, int)
+        params['ucprop'] = self._items(properties, levels=self.PROPERTIES)
+        params['ucshow'] = self._items(show, levels=self.SHOW)
+
+        doc = self.session.get(params)
+        try:
+            if 'query-continue' in doc:
+                uccontinue = doc['query-continue']['usercontribs']
+            else:
+                uccontinue = None
+
+            uc_docs = doc['query']['usercontribs']
+
+            return uc_docs, uccontinue
+
+        except KeyError as e:
+            raise MalformedResponse(str(e), doc)
--- a/mediawiki_dump_tools/Mediawiki-Utilities/mw/api/collections/users.py
+++ b/mediawiki_dump_tools/Mediawiki-Utilities/mw/api/collections/users.py
@@ -0,0 +1,83 @@
+import logging
+
+from ...util import none_or
+from ..errors import MalformedResponse
+from .collection import Collection
+
+logger = logging.getLogger("mw.api.collections.users")
+
+
+class Users(Collection):
+    """
+    A collection of information about users
+    """
+
+    PROPERTIES = {'blockinfo', 'implicitgroups', 'groups', 'registration',
+                  'emailable', 'editcount', 'gender'}
+
+    SHOW = {'minor', '!minor', 'patrolled', '!patrolled'}
+
+    MAX_REVISIONS = 50
+
+    def query(self, *args, **kwargs):
+        """
+        Get a user's metadata.
+        See `<https://www.mediawiki.org/wiki/API:Users>`_
+
+        :Parameters:
+            users : str
+                The usernames of the users to be retrieved.
+            
+            properties : set(str)
+                Include additional pieces of information
+
+                blockinfo      - Tags if the user is blocked, by whom, and
+                                 for what reason
+                groups         - Lists all the groups the user(s) belongs to
+                implicitgroups - Lists all the groups a user is automatically
+                                 a member of
+                rights         - Lists all the rights the user(s) has
+                editcount      - Adds the user's edit count
+                registration   - Adds the user's registration timestamp
+                emailable      - Tags if the user can and wants to receive
+                                 email through [[Special:Emailuser]]
+                gender         - Tags the gender of the user. Returns "male",
+                                 "female", or "unknown"
+        """
+        done = False
+        while not done:
+
+            us_docs, query_continue = self._query(*args, **kwargs)
+
+            for doc in us_docs:
+                yield doc
+
+            if query_continue is None or len(us_docs) == 0:
+                done = True
+            else:
+                kwargs['query_continue'] = query_continue
+
+    def _query(self, users, query_continue=None, properties=None):
+
+        params = {
+            'action': "query",
+            'list': "users"
+        }
+        params['ususers'] = self._items(users, type=str)
+        params['usprop'] = self._items(properties, levels=self.PROPERTIES)
+        if query_continue is not None:
+            params.update(query_continue)
+
+        doc = self.session.get(params)
+        try:
+            if 'query-continue' in doc:
+                query_continue = doc['query-continue']['users']
+            else:
+                query_continue = None
+
+            us_docs = doc['query']['users']
+
+            return us_docs, query_continue
+
+        except KeyError as e:
+            raise MalformedResponse(str(e), doc)
--- a/mediawiki_dump_tools/Mediawiki-Utilities/mw/api/errors.py
+++ b/mediawiki_dump_tools/Mediawiki-Utilities/mw/api/errors.py
@@ -0,0 +1,48 @@
+class DocError(Exception):
+    def __init__(self, message, doc):
+        super().__init__(message)
+
+        self.doc = doc
+        """
+        The document returned by the API that brought about this error.
+        """
+
+
+class APIError(DocError):
+    def __init__(self, doc):
+
+        code = doc.get('error', {}).get('code')
+        message = doc.get('error', {}).get('message')
+
+        super().__init__("{0}:{1}".format(code, message), doc)
+
+        self.code = code
+        """
+        The error code returned by the api -- if available.
+        """
+
+        self.message = message
+        """
+        The error message returned by the api -- if available.
+        """
+
+class AuthenticationError(DocError):
+    def __init__(self, doc):
+        result = doc['login']['result']
+        super().__init__(result, doc)
+
+        self.result = result
+        """
+        The result code of an authentication attempt.
+        """
+
+
+class MalformedResponse(DocError):
+    def __init__(self, key, doc):
+
+        super().__init__("Expected to find '{0}' in result.".format(key), doc)
+
+        self.key = key
+        """
+        The expected, but missing key from the API call.
+        """
--- a/mediawiki_dump_tools/Mediawiki-Utilities/mw/api/session.py
+++ b/mediawiki_dump_tools/Mediawiki-Utilities/mw/api/session.py
@@ -0,0 +1,134 @@
+import logging
+
+from ..util import api
+from .collections import (DeletedRevisions, Pages, RecentChanges, Revisions,
+                          SiteInfo, UserContribs, Users)
+from .errors import APIError, AuthenticationError, MalformedResponse
+
+logger = logging.getLogger("mw.api.session")
+
+DEFAULT_USER_AGENT = "MediaWiki-Utilities"
+"""
+The default User-Agent to be sent with requests to the API.
+"""
+
+class Session(api.Session):
+    """
+    Represents a connection to a MediaWiki API.
+
+    Cookies and other session information is preserved.
+
+    :Parameters:
+        uri : str
+            The base URI for the API to use.  Usually ends in "api.php"
+        user_agent : str
+            The User-Agent to be sent with requests.  Will raise a warning if
+            left to default value.
+    """
+
+    def __init__(self, uri, *args, user_agent=DEFAULT_USER_AGENT, **kwargs):
+        """
+        Constructs a new :class:`Session`.
+        """
+
+        if user_agent == DEFAULT_USER_AGENT:
+            logger.warning("Sending requests with default User-Agent.  "  +
+                           "Set 'user_agent' on api.Session to quiet this " +
+                           "message.")
+
+        if 'headers' in kwargs:
+            kwargs['headers']['User-Agent'] = str(user_agent)
+        else:
+            kwargs['headers'] = {'User-Agent': str(user_agent)}
+
+        super().__init__(uri, *args, **kwargs)
+
+        self.pages = Pages(self)
+        """
+        An instance of :class:`mw.api.Pages`.
+        """
+
+        self.revisions = Revisions(self)
+        """
+        An instance of :class:`mw.api.Revisions`.
+        """
+
+        self.recent_changes = RecentChanges(self)
+        """
+        An instance of :class:`mw.api.RecentChanges`.
+        """
+
+        self.site_info = SiteInfo(self)
+        """
+        An instance of :class:`mw.api.SiteInfo`.
+        """
+
+        self.user_contribs = UserContribs(self)
+        """
+        An instance of :class:`mw.api.UserContribs`.
+        """
+
+        self.users = Users(self)
+        """
+        An instance of :class:`mw.api.Users`.
+        """
+
+        self.deleted_revisions = DeletedRevisions(self)
+        """
+        An instance of :class:`mw.api.DeletedRevisions`.
+        """
+
+    def login(self, username, password, token=None):
+        """
+        Performs a login operation.  This method usually makes two requests to
+        API -- one to get a token and one to use the token to log in.  If
+        authentication fails, this method will throw an
+        :class:`.errors.AuthenticationError`.
+
+        :Parameters:
+            username : str
+                Your username
+            password : str
+                Your password
+
+        :Returns:
+            The response in a json :py:class:`dict`
+        """
+
+        doc = self.post(
+            {
+                'action': "login",
+                'lgname': username,
+                'lgpassword': password,
+                'lgtoken': token, # If None, we'll be getting a token
+            }
+        )
+
+
+        try:
+            if doc['login']['result'] == "Success":
+                return doc
+            elif doc['login']['result'] == "NeedToken":
+
+                if token is not None:
+                    # Woops.  We've been here before.  Better error out.
+                    raise AuthenticationError(doc)
+                else:
+                    token = doc['login']['token']
+                    return self.login(username, password, token=token)
+            else:
+                raise AuthenticationError(doc)
+
+        except KeyError as e:
+            raise MalformedResponse(e.message, doc)
+
+
+    def request(self, type, params, **kwargs):
+        params.update({'format': "json"})
+
+        doc = super().request(type, params, **kwargs).json()
+
+        if 'error' in doc:
+            raise APIError(doc)
+
+        return doc