Initial commit
p# new file: runwikiq.sh
This commit is contained in:
@@ -0,0 +1,37 @@
|
||||
"""
|
||||
Prints the rev_id, characters and hash of all revisions to Willy_on_Wheels.
|
||||
"""
|
||||
import getpass
|
||||
import hashlib
|
||||
import os
|
||||
import sys
|
||||
|
||||
try:
|
||||
sys.path.insert(0, os.path.abspath(os.getcwd()))
|
||||
|
||||
from mw import api
|
||||
except: raise
|
||||
|
||||
|
||||
|
||||
api_session = api.Session("https://en.wikipedia.org/w/api.php")
|
||||
|
||||
print("(EN) Wikipedia credentials...")
|
||||
username = input("Username: ")
|
||||
password = getpass.getpass("Password: ")
|
||||
api_session.login(username, password)
|
||||
|
||||
revisions = api_session.deleted_revisions.query(
|
||||
properties={'ids', 'content'},
|
||||
titles={'Willy on Wheels'},
|
||||
direction="newer"
|
||||
)
|
||||
|
||||
for rev in revisions:
|
||||
print(
|
||||
"{0} ({1} chars): {2}".format(
|
||||
rev['revid'],
|
||||
len(rev.get('*', "")),
|
||||
hashlib.sha1(bytes(rev.get('*', ""), 'utf8')).hexdigest()
|
||||
)
|
||||
)
|
||||
19
mediawiki_dump_tools/Mediawiki-Utilities/examples/api.py
Normal file
19
mediawiki_dump_tools/Mediawiki-Utilities/examples/api.py
Normal file
@@ -0,0 +1,19 @@
|
||||
"""
|
||||
Prints the rev_id of all revisions to User:EpochFail.
|
||||
"""
|
||||
import sys
|
||||
import os
|
||||
|
||||
sys.path.insert(0, os.path.abspath(os.getcwd()))
|
||||
|
||||
from mw import api
|
||||
|
||||
api_session = api.Session("https://en.wikipedia.org/w/api.php")
|
||||
|
||||
revisions = api_session.revisions.query(
|
||||
properties={'ids'},
|
||||
titles={'User:TestAccountForMWUtils'}
|
||||
)
|
||||
|
||||
for rev in revisions:
|
||||
print(rev['revid'])
|
||||
@@ -0,0 +1,30 @@
|
||||
"""
|
||||
Prints the rev_id and hash of the 10 oldest edits in recent_changes.
|
||||
"""
|
||||
import os
|
||||
import sys
|
||||
|
||||
try:
|
||||
sys.path.insert(0, os.path.abspath(os.getcwd()))
|
||||
from mw import api
|
||||
except:
|
||||
raise
|
||||
|
||||
api_session = api.Session("https://en.wikipedia.org/w/api.php")
|
||||
|
||||
changes = api_session.recent_changes.query(
|
||||
type={'edit', 'new'},
|
||||
properties={'ids', 'sha1', 'timestamp'},
|
||||
direction="newer",
|
||||
limit=10
|
||||
)
|
||||
|
||||
for change in changes:
|
||||
print(
|
||||
"{0} ({1}) @ {2}: {3}".format(
|
||||
change['rcid'],
|
||||
change['type'],
|
||||
change['timestamp'],
|
||||
change.get('sha1', "")
|
||||
)
|
||||
)
|
||||
@@ -0,0 +1,28 @@
|
||||
"""
|
||||
Prints the rev_id, characters and hash of all revisions to User:EpochFail.
|
||||
"""
|
||||
import sys
|
||||
import os
|
||||
|
||||
sys.path.insert(0, os.path.abspath(os.getcwd()))
|
||||
|
||||
import hashlib
|
||||
from mw import api
|
||||
|
||||
api_session = api.Session("https://en.wikipedia.org/w/api.php")
|
||||
|
||||
revisions = api_session.revisions.query(
|
||||
properties={'ids', 'content'},
|
||||
titles={"User:EpochFail"},
|
||||
direction="newer",
|
||||
limit=51
|
||||
)
|
||||
|
||||
for rev in revisions:
|
||||
print(
|
||||
"{0} ({1} chars): {2}".format(
|
||||
rev['revid'],
|
||||
len(rev.get('*', "")),
|
||||
hashlib.sha1(bytes(rev.get('*', ""), 'utf8')).hexdigest()
|
||||
)
|
||||
)
|
||||
@@ -0,0 +1,20 @@
|
||||
"""
|
||||
Prints the rev_id, characters and hash of all revisions to User:EpochFail.
|
||||
"""
|
||||
import os
|
||||
import sys
|
||||
|
||||
try:
|
||||
sys.path.insert(0, os.path.abspath(os.getcwd()))
|
||||
from mw import api
|
||||
except:
|
||||
raise
|
||||
|
||||
api_session = api.Session("https://en.wikipedia.org/w/api.php")
|
||||
|
||||
user_docs = api_session.users.query(
|
||||
users=["EpochFail", "Halfak (WMF)"]
|
||||
)
|
||||
|
||||
for user_doc in user_docs:
|
||||
print(user_doc)
|
||||
@@ -0,0 +1,31 @@
|
||||
"""
|
||||
|
||||
"""
|
||||
import os
|
||||
import sys
|
||||
|
||||
try:
|
||||
|
||||
sys.path.insert(0, os.path.abspath(os.getcwd()))
|
||||
from mw import database
|
||||
|
||||
except:
|
||||
raise
|
||||
|
||||
|
||||
|
||||
db = database.DB.from_params(
|
||||
host="analytics-store.eqiad.wmnet",
|
||||
read_default_file="~/.my.cnf",
|
||||
user="research",
|
||||
db="enwiki"
|
||||
)
|
||||
|
||||
users = db.users.query(
|
||||
registered_after="20140101000000",
|
||||
direction="newer",
|
||||
limit=10
|
||||
)
|
||||
|
||||
for user in users:
|
||||
print("{user_id}:{user_name} -- {user_editcount} edits".format(**user))
|
||||
59
mediawiki_dump_tools/Mediawiki-Utilities/examples/dump.xml
Normal file
59
mediawiki_dump_tools/Mediawiki-Utilities/examples/dump.xml
Normal file
@@ -0,0 +1,59 @@
|
||||
<mediawiki xmlns="http://www.mediawiki.org/xml/export-0.8/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="//www.mediawiki.org/xml/export-0.8/ http://www.mediawiki.org/xml/export-0.8.xsd"
|
||||
version="0.8" xml:lang="en">
|
||||
<siteinfo>
|
||||
<sitename>Wikipedia</sitename>
|
||||
<base>http://en.wikipedia.org/wiki/Main_Page</base>
|
||||
<generator>MediaWiki 1.22wmf2</generator>
|
||||
<case>first-letter</case>
|
||||
<namespaces>
|
||||
<namespace key="0" case="first-letter" />
|
||||
<namespace key="1" case="first-letter">Talk</namespace>
|
||||
</namespaces>
|
||||
</siteinfo>
|
||||
<page>
|
||||
<title>Foo</title>
|
||||
<ns>0</ns>
|
||||
<id>1</id>
|
||||
<revision>
|
||||
<id>1</id>
|
||||
<timestamp>2004-08-09T09:04:08Z</timestamp>
|
||||
<contributor>
|
||||
<username>Gen0cide</username>
|
||||
<id>92182</id>
|
||||
</contributor>
|
||||
<text xml:space="preserve">Revision 1 text</text>
|
||||
<sha1>g9chqqg94myzq11c56ixvq7o1yg75n9</sha1>
|
||||
<model>wikitext</model>
|
||||
<format>text/x-wiki</format>
|
||||
</revision>
|
||||
<revision>
|
||||
<id>2</id>
|
||||
<timestamp>2004-08-10T09:04:08Z</timestamp>
|
||||
<contributor>
|
||||
<ip>222.152.210.109</ip>
|
||||
</contributor>
|
||||
<text xml:space="preserve">Revision 2 text</text>
|
||||
<sha1>g9chqqg94myzq11c56ixvq7o1yg75n9</sha1>
|
||||
<model>wikitext</model>
|
||||
<comment>Comment 2</comment>
|
||||
<format>text/x-wiki</format>
|
||||
</revision>
|
||||
</page>
|
||||
<page>
|
||||
<title>Bar</title>
|
||||
<ns>1</ns>
|
||||
<id>2</id>
|
||||
<revision>
|
||||
<id>3</id>
|
||||
<timestamp>2004-08-11T09:04:08Z</timestamp>
|
||||
<contributor>
|
||||
<ip>222.152.210.22</ip>
|
||||
</contributor>
|
||||
<text xml:space="preserve">Revision 3 text</text>
|
||||
<sha1>g9chqqg94myzq11c56ixvq7o1yg75n9</sha1>
|
||||
<model>wikitext</model>
|
||||
<format>text/x-wiki</format>
|
||||
</revision>
|
||||
</page>
|
||||
</mediawiki>
|
||||
31
mediawiki_dump_tools/Mediawiki-Utilities/examples/dump2.xml
Normal file
31
mediawiki_dump_tools/Mediawiki-Utilities/examples/dump2.xml
Normal file
@@ -0,0 +1,31 @@
|
||||
<mediawiki xmlns="http://www.mediawiki.org/xml/export-0.8/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="//www.mediawiki.org/xml/export-0.8/ http://www.mediawiki.org/xml/export-0.8.xsd"
|
||||
version="0.8" xml:lang="en">
|
||||
<siteinfo>
|
||||
<sitename>Wikipedia</sitename>
|
||||
<base>http://en.wikipedia.org/wiki/Main_Page</base>
|
||||
<generator>MediaWiki 1.22wmf2</generator>
|
||||
<case>first-letter</case>
|
||||
<namespaces>
|
||||
<namespace key="0" case="first-letter" />
|
||||
<namespace key="1" case="first-letter">Talk</namespace>
|
||||
</namespaces>
|
||||
</siteinfo>
|
||||
<page>
|
||||
<title>Herp</title>
|
||||
<ns>1</ns>
|
||||
<id>2</id>
|
||||
<revision>
|
||||
<id>4</id>
|
||||
<timestamp>2004-08-11T09:04:08Z</timestamp>
|
||||
<contributor>
|
||||
<id>10</id>
|
||||
<name>FOobar!?</name>
|
||||
</contributor>
|
||||
<text xml:space="preserve">Revision 4 text</text>
|
||||
<sha1>g9chqqg94myzq11c56ixvq7o1yg75n9</sha1>
|
||||
<model>wikitext</model>
|
||||
<format>text/x-wiki</format>
|
||||
</revision>
|
||||
</page>
|
||||
</mediawiki>
|
||||
@@ -0,0 +1,19 @@
|
||||
import pprint
|
||||
import re
|
||||
|
||||
from mw.api import Session
|
||||
from mw.lib import persistence
|
||||
|
||||
session = Session("https://en.wikipedia.org/w/api.php")
|
||||
|
||||
rev, tokens_added, future_revs = persistence.api.score(session, 560561013,
|
||||
properties={'user'})
|
||||
|
||||
words_re = re.compile("\w+", re.UNICODE)
|
||||
|
||||
print("Words added")
|
||||
for token in tokens_added:
|
||||
if words_re.search(token.text):
|
||||
print("'{0}' survived:".format(token.text))
|
||||
for frev in token.revisions:
|
||||
print("\t{revid} by {user}".format(**frev))
|
||||
@@ -0,0 +1,18 @@
|
||||
"""
|
||||
Prints the reverting rev_id, rev_id and reverted to rev_id of all reverted
|
||||
revisions made by user "PermaNoob".
|
||||
"""
|
||||
from mw.api import Session
|
||||
from mw.lib import reverts
|
||||
|
||||
session = Session("https://en.wikipedia.org/w/api.php")
|
||||
revisions = session.user_contribs.query(user={"PermaNoob"}, direction="newer")
|
||||
|
||||
for rev in revisions:
|
||||
revert = reverts.api.check_rev(session, rev, window=60*60*24*2)
|
||||
if revert is not None:
|
||||
print("{0} reverted {1} to {2}".format(
|
||||
revert.reverting['revid'],
|
||||
rev['revid'],
|
||||
revert.reverted_to['revid'])
|
||||
)
|
||||
@@ -0,0 +1,23 @@
|
||||
"""
|
||||
Prints the reverting rev_id, rev_id and reverted to rev_id of all reverted
|
||||
revisions made by user with ID 9133062.
|
||||
"""
|
||||
from mw.database import DB
|
||||
from mw.lib import reverts
|
||||
|
||||
db = DB.from_params(
|
||||
host="s1-analytics-slave.eqiad.wmnet",
|
||||
read_default_file="~/.my.cnf",
|
||||
user="research",
|
||||
db="enwiki"
|
||||
)
|
||||
revisions = db.revisions.query(user_id=9133062)
|
||||
|
||||
for rev_row in revisions:
|
||||
revert = reverts.database.check_row(db, rev_row)
|
||||
if revert is not None:
|
||||
print("{0} reverted {1} to {2}".format(
|
||||
revert.reverting['rev_id'],
|
||||
rev_row['rev_id'],
|
||||
revert.reverted_to['rev_id'])
|
||||
)
|
||||
@@ -0,0 +1,21 @@
|
||||
"""
|
||||
Prints all reverted revisions of User:EpochFail.
|
||||
"""
|
||||
from mw.api import Session
|
||||
from mw.lib import reverts
|
||||
|
||||
# Gather a page's revisions from the API
|
||||
api_session = Session("https://en.wikipedia.org/w/api.php")
|
||||
revs = api_session.revisions.query(
|
||||
titles={"User:EpochFail"},
|
||||
properties={'ids', 'sha1'},
|
||||
direction="newer"
|
||||
)
|
||||
|
||||
# Creates a revsion event iterator
|
||||
rev_events = ((rev['sha1'], rev) for rev in revs)
|
||||
|
||||
# Detect and print reverts
|
||||
for revert in reverts.detect(rev_events):
|
||||
print("{0} reverted back to {1}".format(revert.reverting['revid'],
|
||||
revert.reverted_to['revid']))
|
||||
@@ -0,0 +1,17 @@
|
||||
"""
|
||||
Prints out session information for user "TextAccountForMWUtils"
|
||||
"""
|
||||
from mw.api import Session
|
||||
from mw.lib import sessions
|
||||
|
||||
# Gather a user's revisions from the API
|
||||
api_session = Session("https://en.wikipedia.org/w/api.php")
|
||||
revs = api_session.user_contribs.query(
|
||||
user={"TestAccountForMWUtils"},
|
||||
direction="newer"
|
||||
)
|
||||
rev_events = ((rev['user'], rev['timestamp'], rev) for rev in revs)
|
||||
|
||||
# Extract and print sessions
|
||||
for user, session in sessions.cluster(rev_events):
|
||||
print("{0}'s session with {1} revisions".format(user, len(session)))
|
||||
@@ -0,0 +1,26 @@
|
||||
"""
|
||||
Demonstrates title normalization and parsing.
|
||||
"""
|
||||
import sys
|
||||
import os
|
||||
|
||||
sys.path.insert(0, os.path.abspath(os.getcwd()))
|
||||
|
||||
from mw.api import Session
|
||||
from mw.lib import title
|
||||
|
||||
# Normalize titles
|
||||
title.normalize("foo bar")
|
||||
# > "Foo_bar"
|
||||
|
||||
# Construct a title parser from the API
|
||||
api_session = Session("https://en.wikipedia.org/w/api.php")
|
||||
parser = title.Parser.from_api(api_session)
|
||||
|
||||
# Handles normalization
|
||||
parser.parse("user:epochFail")
|
||||
# > 2, "EpochFail"
|
||||
|
||||
# Handles namespace aliases
|
||||
parser.parse("WT:foobar")
|
||||
# > 5, "Foobar"
|
||||
@@ -0,0 +1,27 @@
|
||||
"""
|
||||
Demonstrates some simple Timestamp operations
|
||||
"""
|
||||
from mw import Timestamp
|
||||
|
||||
# Seconds since Unix Epoch
|
||||
str(Timestamp(1234567890))
|
||||
# > '20090213233130'
|
||||
|
||||
# Database format
|
||||
int(Timestamp("20090213233130"))
|
||||
# > 1234567890
|
||||
|
||||
# API format
|
||||
int(Timestamp("2009-02-13T23:31:30Z"))
|
||||
# > 1234567890
|
||||
|
||||
# Difference in seconds
|
||||
Timestamp("2009-02-13T23:31:31Z") - Timestamp(1234567890)
|
||||
# > 1
|
||||
|
||||
# strptime and strftime
|
||||
Timestamp(1234567890).strftime("%Y foobar")
|
||||
# > '2009 foobar'
|
||||
|
||||
str(Timestamp.strptime("2009 derp 10", "%Y derp %m"))
|
||||
# > '20091001000000'
|
||||
@@ -0,0 +1,14 @@
|
||||
"""
|
||||
Prints out all rev_ids that appear in dump.xml.
|
||||
"""
|
||||
from mw.xml_dump import Iterator
|
||||
|
||||
# Construct dump file iterator
|
||||
dump = Iterator.from_file(open("examples/dump.xml"))
|
||||
|
||||
# Iterate through pages
|
||||
for page in dump:
|
||||
|
||||
# Iterate through a page's revisions
|
||||
for revision in page:
|
||||
print(revision.id)
|
||||
@@ -0,0 +1,15 @@
|
||||
"""
|
||||
Processes two dump files.
|
||||
"""
|
||||
from mw import xml_dump
|
||||
|
||||
files = ["examples/dump.xml", "examples/dump2.xml"]
|
||||
|
||||
|
||||
def page_info(dump, path):
|
||||
for page in dump:
|
||||
yield page.id, page.namespace, page.title
|
||||
|
||||
|
||||
for page_id, page_namespace, page_title in xml_dump.map(files, page_info):
|
||||
print("\t".join([str(page_id), str(page_namespace), page_title]))
|
||||
Reference in New Issue
Block a user