86 lines
2.9 KiB
R
86 lines
2.9 KiB
R
# Processes data from the Wikia API to identify bots and admins
|
|
# Copyright (C) 2018 Nathan TeBlunthuis
|
|
|
|
# This program is free software: you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License as published by
|
|
# the Free Software Foundation, either version 3 of the License, or
|
|
# (at your option) any later version.
|
|
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
|
|
library(devtools)
|
|
|
|
load_all("RCommunityData")
|
|
|
|
# Get files for a wiki
|
|
load.rights.files <- function (filename) {
|
|
wiki <- gsub('\\.tsv$', '', filename)
|
|
print(wiki)
|
|
logevents <- read.delim(paste("logevents-2017/", filename, sep=""),
|
|
stringsAsFactors=FALSE, encoding="UTF-8", quote="")
|
|
|
|
current.userroles <- read.delim(paste("userlist-2017/", filename, sep=""),
|
|
stringsAsFactors=FALSE, na.string="",
|
|
encoding="UTF-8", header=TRUE)
|
|
|
|
d <- generate.admin.addrm(logevents, current.userroles)
|
|
d$wiki <- wiki
|
|
return(d)
|
|
}
|
|
|
|
setwd("userroles_data/")
|
|
wiki.files = paste0(wiki.list$wiki,".tsv")
|
|
userroles <- rbindlist(lapply(wiki.files, load.rights.files))
|
|
userroles$blocked <- grepl('^<span class="listusers_blockeduser">(.*?)$', userroles$role)
|
|
userroles$role <- gsub('^<span class="listusers_blockeduser">(.*?)$','\\1', userroles$role)
|
|
userroles$role <- gsub('^(.*?)</span>$','\\1', userroles$role)
|
|
|
|
userroles[, is.action.admin := (role %in% c("sysop", "bureaucrat","sysop,bureaucrat","staff","admin","fanonadmin","steward"))]
|
|
userroles[, is.action.bot := (role %in% c("bot", "fyzbot","bot-global"))]
|
|
|
|
bots = userroles[is.action.bot==TRUE]
|
|
admins = userroles[is.action.admin==TRUE]
|
|
|
|
setorder(bots,"timestamp")
|
|
setorder(admins,"timestamp")
|
|
## we want to keep track of when the roles changed
|
|
## assume nobody was a bot or admin at the beginning of Mediawiki
|
|
|
|
## userroles[,':='(
|
|
## prev.isbot = ifelse(is.na(prev.isbot),(isbot & action=="removed"),prev.isbot)
|
|
|
|
bots[,
|
|
":="(
|
|
role.period.begin = timestamp,
|
|
role.period.end = shift(timestamp,fill=as.POSIXct("2017-01-01"),type="lead"))
|
|
,by = .(wiki,user)
|
|
]
|
|
|
|
bots[,":="(is.bot = (action == "added"))]
|
|
|
|
admins[,
|
|
":="(
|
|
role.period.begin = timestamp,
|
|
role.period.end = shift(timestamp,fill=as.POSIXct("2017-01-01"),type="lead"))
|
|
,by = .(wiki,user)
|
|
]
|
|
|
|
admins[,":="(is.admin = (action == "added") )]
|
|
|
|
# save data to an output file for knitr
|
|
setwd("..");
|
|
rm(load.rights.files)
|
|
rm(wiki.files,userroles)
|
|
|
|
if (!nosave) {
|
|
saveRDS(bots, file="bots.RDS")
|
|
saveRDS(admins, file="admins.RDS")
|
|
saveRDS(r, file="lib-01-generate_userroles.RDS")
|
|
}
|