1
0

Initial commit

p#	new file:   runwikiq.sh
This commit is contained in:
2018-06-02 15:32:19 -07:00
commit 72633c193b
202 changed files with 21929 additions and 0 deletions

97
03_generate_plots.R Executable file
View File

@@ -0,0 +1,97 @@
#!/usr/bin/env Rscript
# Creates data for plotting
# Copyright (C) 2018 Nathan TeBlunthuis
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
library("ggplot2")
library("bootstrap")
library("scales")
source("lib-00-utils.R")
if(!exists("newcomers")){
source("01_build_datasets.R")
}
remember(min(all.edits$date.time),"earliest.data.point")
remember(max(all.edits$date.time),"latest.data.point")
p1.data <- newcomers[,.(p.reverted = mean(is.reverted),
var.reverted=var(is.reverted),
p.survives=mean(survives),
var.survives=(var(survives)),
N=.N),
by=.(wiki.name,wiki.age.half.years)]
p1.data <- p1.data[N>1]
p1.data[,N.wikis := .N, by = .(wiki.age.half.years)]
## put p1 data onto sd scales
p1.data[,p.survives.in.sd := p.survives/sd(p.survives),by=.(wiki.name)]
p1.data[,p.reverted.in.sd := p.reverted/sd(p.reverted),by=.(wiki.name)]
p.data <- melt(p1.data,id.vars=c("wiki.name","wiki.age.half.years"),measure.vars=c("p.survives","p.reverted","p.survives.in.sd","p.reverted.in.sd"))
p.stats <- p.data[,as.list(c(setNames(boxplot.stats(value,coef=1.5)$stats,c("min","q1","med","q3","max")),
mu=mean(value),N.wikis=.N)),by=.(wiki.age.half.years,variable)]
remember(p.stats)
p.stats[variable=="p.survives"]$variable="Survives"
p.stats[variable=="p.reverted"]$variable="Reverted"
remember(cor.test(p1.data$wiki.age.half.years,p1.data$p.survives,method='spearman',alternative='less'),"survives.cor.test")
remember(cor.test(p1.data$wiki.age.half.years,p1.data$p.reverted,method='spearman',alternative='greater'),"reverted.cor.test")
xlabels = paste0("Year ", 0:max(p.stats$wiki.age.half.years))
p <- ggplot(p.stats,aes(x=as.factor(wiki.age.half.years),ymin=min,lower=q1,middle=med,upper=q3,ymax=max,width=0.3))
p <- p + geom_boxplot(stat='identity')
p <- p + geom_line(aes(x=wiki.age.half.years+1,y=med), linetype=2)
p <- p + facet_wrap("variable",nrow=2,strip.position="bottom",scales="free")
p <- p + scale_y_continuous(name="Proportion of newcomers",minor_breaks=NULL) + scale_x_discrete(name="Wiki age", labels=xlabels)
p <- p + theme_bw() + theme(legend.position="None")
pdf(width=6,height=6)
print(p)
dev.off()
active.editors <- all.edits[,
.(N.edits=.N,
wiki.age.years=first(wiki.age.years)),
by=.(wiki.name,
editor,
wiki.age.months)]
n.active.editors <- active.editors[N.edits >= 5,
.(N.active.editors = .N,
wiki.age.years=first(wiki.age.years)),
by=.(wiki.name,wiki.age.months)]
n.active.editors[, ":="(N=.N), by=.(wiki.age.months)]
n.active.editors[,":="(max.age=max(wiki.age.months),max.active.editors=max(N.active.editors),sd.units.active.editors=N.active.editors/sd(N.active.editors)),by="wiki.name"]
n.active.editors[,":="(active.editors.pmax=N.active.editors/max.active.editors)]
wiki.age.quantile <- .90
max.age.months <- quantile(n.active.editors$max.age,wiki.age.quantile)
boot <- n.active.editors[is.finite(sd.units.active.editors)&wiki.age.months <= max.age.months,.(thetastar = bootstrap(x=sd.units.active.editors,nboot=5000,mean)$thetastar),by=.(wiki.age.months)]
boot.ci <- boot[,as.list(quantile(thetastar,probs=c(0.025,0.975))),by=.(wiki.age.months)]
names(boot.ci) <- c("wiki.age.months","lower.ci","upper.ci")
plot2.data <- n.active.editors[is.finite(sd.units.active.editors) & wiki.age.months <= max.age.months,.(sd.units.active.editors = mean(sd.units.active.editors),N.active.editors = mean(N.active.editors),wiki.age.years=first(wiki.age.years),N.wikis=.N),by=.(wiki.age.months)]
plot2.data[boot.ci,":="(lower.ci=lower.ci,upper.ci=upper.ci),on="wiki.age.months"]
remember(plot2.data,'plot.active.editors.dt')