Initial commit
p# new file: runwikiq.sh
This commit is contained in:
97
03_generate_plots.R
Executable file
97
03_generate_plots.R
Executable file
@@ -0,0 +1,97 @@
|
||||
#!/usr/bin/env Rscript
|
||||
|
||||
# Creates data for plotting
|
||||
|
||||
# Copyright (C) 2018 Nathan TeBlunthuis
|
||||
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
library("ggplot2")
|
||||
library("bootstrap")
|
||||
|
||||
library("scales")
|
||||
source("lib-00-utils.R")
|
||||
if(!exists("newcomers")){
|
||||
source("01_build_datasets.R")
|
||||
}
|
||||
|
||||
remember(min(all.edits$date.time),"earliest.data.point")
|
||||
remember(max(all.edits$date.time),"latest.data.point")
|
||||
|
||||
p1.data <- newcomers[,.(p.reverted = mean(is.reverted),
|
||||
var.reverted=var(is.reverted),
|
||||
p.survives=mean(survives),
|
||||
var.survives=(var(survives)),
|
||||
N=.N),
|
||||
by=.(wiki.name,wiki.age.half.years)]
|
||||
p1.data <- p1.data[N>1]
|
||||
p1.data[,N.wikis := .N, by = .(wiki.age.half.years)]
|
||||
## put p1 data onto sd scales
|
||||
p1.data[,p.survives.in.sd := p.survives/sd(p.survives),by=.(wiki.name)]
|
||||
p1.data[,p.reverted.in.sd := p.reverted/sd(p.reverted),by=.(wiki.name)]
|
||||
|
||||
p.data <- melt(p1.data,id.vars=c("wiki.name","wiki.age.half.years"),measure.vars=c("p.survives","p.reverted","p.survives.in.sd","p.reverted.in.sd"))
|
||||
|
||||
p.stats <- p.data[,as.list(c(setNames(boxplot.stats(value,coef=1.5)$stats,c("min","q1","med","q3","max")),
|
||||
mu=mean(value),N.wikis=.N)),by=.(wiki.age.half.years,variable)]
|
||||
|
||||
remember(p.stats)
|
||||
p.stats[variable=="p.survives"]$variable="Survives"
|
||||
p.stats[variable=="p.reverted"]$variable="Reverted"
|
||||
|
||||
remember(cor.test(p1.data$wiki.age.half.years,p1.data$p.survives,method='spearman',alternative='less'),"survives.cor.test")
|
||||
remember(cor.test(p1.data$wiki.age.half.years,p1.data$p.reverted,method='spearman',alternative='greater'),"reverted.cor.test")
|
||||
|
||||
xlabels = paste0("Year ", 0:max(p.stats$wiki.age.half.years))
|
||||
p <- ggplot(p.stats,aes(x=as.factor(wiki.age.half.years),ymin=min,lower=q1,middle=med,upper=q3,ymax=max,width=0.3))
|
||||
p <- p + geom_boxplot(stat='identity')
|
||||
p <- p + geom_line(aes(x=wiki.age.half.years+1,y=med), linetype=2)
|
||||
p <- p + facet_wrap("variable",nrow=2,strip.position="bottom",scales="free")
|
||||
p <- p + scale_y_continuous(name="Proportion of newcomers",minor_breaks=NULL) + scale_x_discrete(name="Wiki age", labels=xlabels)
|
||||
p <- p + theme_bw() + theme(legend.position="None")
|
||||
|
||||
pdf(width=6,height=6)
|
||||
print(p)
|
||||
dev.off()
|
||||
|
||||
active.editors <- all.edits[,
|
||||
.(N.edits=.N,
|
||||
wiki.age.years=first(wiki.age.years)),
|
||||
by=.(wiki.name,
|
||||
editor,
|
||||
wiki.age.months)]
|
||||
|
||||
n.active.editors <- active.editors[N.edits >= 5,
|
||||
.(N.active.editors = .N,
|
||||
wiki.age.years=first(wiki.age.years)),
|
||||
by=.(wiki.name,wiki.age.months)]
|
||||
|
||||
n.active.editors[, ":="(N=.N), by=.(wiki.age.months)]
|
||||
|
||||
n.active.editors[,":="(max.age=max(wiki.age.months),max.active.editors=max(N.active.editors),sd.units.active.editors=N.active.editors/sd(N.active.editors)),by="wiki.name"]
|
||||
n.active.editors[,":="(active.editors.pmax=N.active.editors/max.active.editors)]
|
||||
wiki.age.quantile <- .90
|
||||
|
||||
max.age.months <- quantile(n.active.editors$max.age,wiki.age.quantile)
|
||||
|
||||
boot <- n.active.editors[is.finite(sd.units.active.editors)&wiki.age.months <= max.age.months,.(thetastar = bootstrap(x=sd.units.active.editors,nboot=5000,mean)$thetastar),by=.(wiki.age.months)]
|
||||
|
||||
boot.ci <- boot[,as.list(quantile(thetastar,probs=c(0.025,0.975))),by=.(wiki.age.months)]
|
||||
names(boot.ci) <- c("wiki.age.months","lower.ci","upper.ci")
|
||||
|
||||
plot2.data <- n.active.editors[is.finite(sd.units.active.editors) & wiki.age.months <= max.age.months,.(sd.units.active.editors = mean(sd.units.active.editors),N.active.editors = mean(N.active.editors),wiki.age.years=first(wiki.age.years),N.wikis=.N),by=.(wiki.age.months)]
|
||||
|
||||
plot2.data[boot.ci,":="(lower.ci=lower.ci,upper.ci=upper.ci),on="wiki.age.months"]
|
||||
|
||||
remember(plot2.data,'plot.active.editors.dt')
|
||||
Reference in New Issue
Block a user