Initial commit
p# new file: runwikiq.sh
This commit is contained in:
85
lib-01-generate_userroles.R
Normal file
85
lib-01-generate_userroles.R
Normal file
@@ -0,0 +1,85 @@
|
||||
# Processes data from the Wikia API to identify bots and admins
|
||||
# Copyright (C) 2018 Nathan TeBlunthuis
|
||||
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
library(devtools)
|
||||
|
||||
load_all("RCommunityData")
|
||||
|
||||
# Get files for a wiki
|
||||
load.rights.files <- function (filename) {
|
||||
wiki <- gsub('\\.tsv$', '', filename)
|
||||
print(wiki)
|
||||
logevents <- read.delim(paste("logevents-2017/", filename, sep=""),
|
||||
stringsAsFactors=FALSE, encoding="UTF-8", quote="")
|
||||
|
||||
current.userroles <- read.delim(paste("userlist-2017/", filename, sep=""),
|
||||
stringsAsFactors=FALSE, na.string="",
|
||||
encoding="UTF-8", header=TRUE)
|
||||
|
||||
d <- generate.admin.addrm(logevents, current.userroles)
|
||||
d$wiki <- wiki
|
||||
return(d)
|
||||
}
|
||||
|
||||
setwd("userroles_data/")
|
||||
wiki.files = paste0(wiki.list$wiki,".tsv")
|
||||
userroles <- rbindlist(lapply(wiki.files, load.rights.files))
|
||||
userroles$blocked <- grepl('^<span class="listusers_blockeduser">(.*?)$', userroles$role)
|
||||
userroles$role <- gsub('^<span class="listusers_blockeduser">(.*?)$','\\1', userroles$role)
|
||||
userroles$role <- gsub('^(.*?)</span>$','\\1', userroles$role)
|
||||
|
||||
userroles[, is.action.admin := (role %in% c("sysop", "bureaucrat","sysop,bureaucrat","staff","admin","fanonadmin","steward"))]
|
||||
userroles[, is.action.bot := (role %in% c("bot", "fyzbot","bot-global"))]
|
||||
|
||||
bots = userroles[is.action.bot==TRUE]
|
||||
admins = userroles[is.action.admin==TRUE]
|
||||
|
||||
setorder(bots,"timestamp")
|
||||
setorder(admins,"timestamp")
|
||||
## we want to keep track of when the roles changed
|
||||
## assume nobody was a bot or admin at the beginning of Mediawiki
|
||||
|
||||
## userroles[,':='(
|
||||
## prev.isbot = ifelse(is.na(prev.isbot),(isbot & action=="removed"),prev.isbot)
|
||||
|
||||
bots[,
|
||||
":="(
|
||||
role.period.begin = timestamp,
|
||||
role.period.end = shift(timestamp,fill=as.POSIXct("2017-01-01"),type="lead"))
|
||||
,by = .(wiki,user)
|
||||
]
|
||||
|
||||
bots[,":="(is.bot = (action == "added"))]
|
||||
|
||||
admins[,
|
||||
":="(
|
||||
role.period.begin = timestamp,
|
||||
role.period.end = shift(timestamp,fill=as.POSIXct("2017-01-01"),type="lead"))
|
||||
,by = .(wiki,user)
|
||||
]
|
||||
|
||||
admins[,":="(is.admin = (action == "added") )]
|
||||
|
||||
# save data to an output file for knitr
|
||||
setwd("..");
|
||||
rm(load.rights.files)
|
||||
rm(wiki.files,userroles)
|
||||
|
||||
if (!nosave) {
|
||||
saveRDS(bots, file="bots.RDS")
|
||||
saveRDS(admins, file="admins.RDS")
|
||||
saveRDS(r, file="lib-01-generate_userroles.RDS")
|
||||
}
|
||||
Reference in New Issue
Block a user