150 lines
6.8 KiB
R
Executable File
150 lines
6.8 KiB
R
Executable File
#!/usr/bin/env Rscript
|
|
|
|
# Fits models predicting reverions of namespace 4 edits
|
|
# Copyright (C) 2018 Nathan TeBlunthuis
|
|
|
|
# This program is free software: you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License as published by
|
|
# the Free Software Foundation, either version 3 of the License, or
|
|
# (at your option) any later version.
|
|
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
|
|
library(effects)
|
|
library(texreg)
|
|
library(lme4)
|
|
if(!exists("newcomers")){
|
|
source("01_build_datasets.R")
|
|
}
|
|
nosave <- FALSE
|
|
sample <- FALSE
|
|
|
|
if(sample == TRUE){
|
|
source("lib-01-sample-datasets.R")
|
|
ns4.ds <- sample.ns4.edits()
|
|
weights <- ns4.ds$weight
|
|
}else{
|
|
ns4.ds <- ns4.reg.edits
|
|
}
|
|
|
|
|
|
ns4.ds <- ns4.ds[,":="(wiki.age.log = log1p(as.double(wiki.age,units="years")),
|
|
age.log = log1p(as.double(age,units="years")),
|
|
wiki.age = as.double(wiki.age,units='years'),
|
|
quarter = as.factor(paste0(year(date.time),"_",ceiling(month(date.time)/4))),
|
|
age = as.double(age,units='years'))]
|
|
|
|
ns4.ds <- ns4.ds[,":="(time.first.wikia.edit = min(time.first.edit)),by=.(editor)]
|
|
ns4.ds.all.newcomers <- ns4.ds
|
|
ns4.ds <- ns4.ds[time.first.wikia.edit == time.first.edit]
|
|
|
|
ns4.summary.stats <- list()
|
|
ns4.summary.stats$p.reverted <- mean(ns4.ds$reverted)
|
|
ns4.summary.stats$var.reverted <- var(ns4.ds$reverted)
|
|
ns4.summary.stats$mean.editor.age <- mean(ns4.ds$age)
|
|
ns4.summary.stats$var.editor.age <- var(ns4.ds$age)
|
|
ns4.summary.stats$median.editor.age <- median(ns4.ds$age)
|
|
ns4.summary.stats$mean.wiki.age <- mean(ns4.ds$wiki.age)
|
|
ns4.summary.stats$var.wiki.age <- var(ns4.ds$wiki.age)
|
|
ns4.summary.stats$median.wiki.age <- median(ns4.ds$wiki.age)
|
|
|
|
remember(ns4.summary.stats)
|
|
|
|
print('fit morgan model')
|
|
f.morgan <- as.formula("reverted ~ age + wiki.age + quarter + wiki.name")
|
|
morgan.model <- glm(f.morgan,data=ns4.ds,family=binomial(link='logit'))
|
|
saveRDS(morgan.model,"morgan.model.RDS")
|
|
remember(extract(morgan.model),"morgan.model",silent=TRUE)
|
|
|
|
print('fit morgan model weights')
|
|
f.morgan <- as.formula("reverted ~ age + wiki.age + quarter + wiki.name")
|
|
|
|
n.total.wikis <- length(unique(ns4.ds$wiki.name))
|
|
weight.per.wiki <- nrow(ns4.ds)/n.total.wikis
|
|
ns4.ds <- ns4.ds[,weights := weight.per.wiki/.N, by=.(wiki.name)]
|
|
morgan.model.weighted <- glm(f.morgan,data=ns4.ds,family=binomial(link='logit'),weights=ns4.ds$weights)
|
|
saveRDS(morgan.model.weighted,"morgan.model.weighted.RDS")
|
|
remember(extract(morgan.model.weighted),"morgan.model.weighted",silent=TRUE)
|
|
|
|
print('fit morgan model weights')
|
|
f.morgan <- as.formula("reverted ~ age + wiki.age + quarter + wiki.name")
|
|
ns4.ds <- ns4.ds[,N:=.N,by=wiki.name]
|
|
ns4.ds.temp <- ns4.ds
|
|
min.edits <- 10
|
|
remember(print(1 - length(unique(ns4.ds[N>=min.edits]$wiki.name))/length(unique(ns4.ds$wiki.name))),"p.wikis.removed.weighted2")
|
|
# remove the bottom 24.1% of wikis
|
|
ns4.ds <- ns4.ds[N>=min.edits]
|
|
n.total.wikis <- length(unique(ns4.ds$wiki.name))
|
|
weight.per.wiki <- nrow(ns4.ds)/n.total.wikis
|
|
ns4.ds <- ns4.ds[,weights := weight.per.wiki/.N, by=.(wiki.name)]
|
|
morgan.model.weighted2 <- glm(f.morgan,data=ns4.ds,family=binomial(link='logit'),weights=ns4.ds$weights)
|
|
saveRDS(morgan.model.weighted2,"morgan.model.weighted2.RDS")
|
|
remember(extract(morgan.model.weighted2),"morgan.model.weighted2",silent=TRUE)
|
|
ns4.ds <- ns4.ds.temp
|
|
|
|
print('fit morgan model all newcomers')
|
|
morgan.model.all.newcomers <- glm(f.morgan,data=ns4.ds.all.newcomers,family=binomial(link='logit'))
|
|
saveRDS(morgan.model.all.newcomers,"morgan.model.all.newcomers.RDS")
|
|
remember(extract(morgan.model.all.newcomers),"morgan.model.all.newcomers",silent=TRUE)
|
|
|
|
print('fitting RE model')
|
|
|
|
re.icc.reverted.model <- glmer(as.formula("reverted ~ + (1 | wiki.name) -1 "),data=ns4.ds,family=binomial(link=logit))
|
|
saveRDS(re.icc.reverted.model,"re.icc.reverted.model.RDS")
|
|
varcorrmat <- as.data.table(VarCorr(re.icc.reverted.model))
|
|
wiki.var <- varcorrmat[grp=='wiki.name' & var1=="(Intercept)" ,vcov]
|
|
group.var <- var(residuals(re.icc.reverted.model))
|
|
icc <- wiki.var/(group.var + wiki.var)
|
|
remember(varcorrmat,'icc.reverted.varcorrmat')
|
|
remember(group.var,'icc.reverted.group.var')
|
|
remember(icc,'icc.reverted')
|
|
|
|
## print("fit morgan model sample")
|
|
## sample.size <- 30
|
|
## ns4.ds <- ns4.ds[,in.sample:=(.N >= sample.size),by=wiki.name]
|
|
## # DT[,.SD[sample(.N, min(3,.N))],by = a]
|
|
## ns4.ds.equal.sample <- ns4.ds[,.SD[sample(.N,min(sample.size,.N))], by=wiki.name]
|
|
## morgan.model.sampled <- glm(f.morgan,data=ns4.ds.equal.sample,family=binomial(link='logit'))
|
|
## saveRDS(morgan.model.sampled,"morgan.model.sampled.RDS")
|
|
## remember(extract(morgan.model.sampled),"morgan.model.sampled",silent=TRUE)
|
|
|
|
## ns4.model2.formula <- as.formula("reverted ~ age.log + wiki.age + quarter")
|
|
## ns4.model2 <- glm(ns4.model2,data=ns4.ds,family=binomial(link='logit'),weights=weights)
|
|
## remember(extract(ns4.model2),"ns4.model2")
|
|
|
|
## print('fit morgan no pooling model')
|
|
## f.morgan <- as.formula("reverted ~ age.log + wiki.age + quarter + wiki.name:age.log + wiki.name:wiki.age")
|
|
## morgan.model <- glm(f.morgan,data=ns4.ds,family=binomial(link='logit'),weights=weights)
|
|
## remember(extract(morgan.model),"morgan.model")
|
|
|
|
## re.ns4.model <- glmer(as.formula("reverted ~ age.log + wiki.age + quarter | wiki.name"),data=ns4.ds,family=binomial(link='logit'),weights=weights)
|
|
|
|
## remember(extract(re.ns4.model),'re.ns4.model')
|
|
|
|
## print('fit morgan.robustness.1 model')
|
|
## f.morgan.robustness.1 <- as.formula("reverted ~ age.log + wiki.age + quarter + wiki.name")
|
|
## ns4.reg.edits.robustness <- build.namespace4.dataset(all.edits[p.reverted < 0.5])
|
|
|
|
## ns4.reg.edits.robustness[,":="(wiki.age.log = log1p(as.double(wiki.age,units="weeks")),
|
|
## age.log = log1p(as.double(age,units="weeks")),
|
|
## wiki.age = as.double(wiki.age,units='weeks'),
|
|
## quarter = as.factor(paste0(year(date.time),"_",ceiling(month(date.time)/4))))]
|
|
|
|
## morgan.robustness.1.model <- glm(f.morgan.robustness.1,data=pns4.reg.edits.robustness,family=binomial(link='logit'),weights=weights)
|
|
## saveRDS(morgan.robustness.1.model,"morgan.robustness.1.model.RDS")
|
|
## remember(extract(morgan.robustness.1.model),"morgan.robustness.1.model")
|
|
|
|
|
|
## ns4.ds[,":="(wiki.age.log = log1p(as.numeric(wiki.age,units="weeks")), age.log = log1p(as.numeric(age,units="weeks")))]
|
|
## f.ns4.2 <- as.formula("reverted ~ age.log + wiki.age.log + age.log|wiki.age.log + wiki.name")
|
|
## ns4.2.model <- glm(f.morgan,data=ns4.ds,family=binomial(link='logit'),weights=weights)
|
|
## remember(extract(ns4.2.model))
|
|
|
|
## summary statistics for namespace 4 edits
|