Initial commit
p# new file: runwikiq.sh
This commit is contained in:
77
01_build_datasets.R
Executable file
77
01_build_datasets.R
Executable file
@@ -0,0 +1,77 @@
|
||||
#!/usr/bin/env Rscript
|
||||
# Top level script for building datasets.
|
||||
# Copyright (C) 2018 Nathan TeBlunthuis
|
||||
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
library(data.table)
|
||||
library(parallel)
|
||||
|
||||
plot.distribution <- function(data,varname,save=TRUE){
|
||||
x = data[[varname]];
|
||||
print(paste("plotting distribution for",varname))
|
||||
if(save){
|
||||
pdf(paste0("plots/",varname,".distribution.pdf"))
|
||||
}
|
||||
## overlay histogram, empirical density and normal density
|
||||
if(class(x) == "logical"){
|
||||
p0 <- qplot(x)
|
||||
}
|
||||
else{
|
||||
|
||||
p0 = qplot(x, geom = 'blank') +
|
||||
geom_line(aes(y = ..density.., colour = 'Empirical'), stat = 'density') +
|
||||
geom_histogram(aes(y = ..density..), alpha = 0.4,bins=100) +
|
||||
scale_colour_manual(name = 'Density', values = c('red', 'blue')) +
|
||||
theme(legend.position = c(0.85, 0.85))
|
||||
}
|
||||
print(p0)
|
||||
if(save){
|
||||
dev.off()
|
||||
}
|
||||
}
|
||||
|
||||
if(!exists("wiki.list")){
|
||||
source("lib-00-utils.R",echo=TRUE)
|
||||
}
|
||||
|
||||
if(!exists("bots") | !exists("admins")){
|
||||
if(file.exists("bots.RDS") & file.exists("admins.RDS")){
|
||||
bots = readRDS("bots.RDS")
|
||||
admins = readRDS("admins.RDS")
|
||||
}
|
||||
else {
|
||||
source("lib-01-generate_userroles.R",echo=TRUE)
|
||||
}
|
||||
}
|
||||
|
||||
if(!exists("newcomer.dt")){
|
||||
intermediate.files <- list("newcomers.RDS","wikiweeks.RDS","wiki.stats.RDS","active.editors.RDS")
|
||||
if(! all(sapply(intermediate.files,function (x) file.exists(x)))){
|
||||
source("lib-01-build_newcomer_table.R",echo=TRUE)
|
||||
}
|
||||
}
|
||||
|
||||
plot.distributions = FALSE
|
||||
if(plot.distributions == TRUE){
|
||||
library(ggplot2)
|
||||
## plot distributions for model 1
|
||||
outcome1 <- c("survives")
|
||||
predictors1 <- c("is.reverted","is.messaged","is.bot.reverted","is.reverted.messaged","is.admin.reverted","BRD.initiation","BRD.reciprocation")
|
||||
controls1 <- c("ns0.edits","ns1.edits","ns4.edits","n.other.wikis","week","has.edited.other.wikis","n.edits.other","n.messages","n.editors","total.wiki.length","revert.rate","revert.disc.rate","newcomer.revert.disc.rate","revert.message.rate","newcomer.revert.message.rate","newcomer.edits.rate","bot.revert.rate","bot.revert.prop","newcomer.bot.revert.rate","newcomer.bot.revert.prop","admin.revert.rate","admin.revert.prop","n.ns4.edits","n.ns4.editors","d.ns4.length","ns4.editor.age","age","wiki.age")
|
||||
|
||||
for(varname in c(outcome1,predictors1,controls1)){
|
||||
plot.distribution(newcomers,varname)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user