1
0
Files
rises_declines_wikia_code/04_model_namespace4.R
groceryheist 72633c193b Initial commit
p#	new file:   runwikiq.sh
2018-06-02 15:32:19 -07:00

150 lines
6.8 KiB
R
Executable File

#!/usr/bin/env Rscript
# Fits models predicting reverions of namespace 4 edits
# Copyright (C) 2018 Nathan TeBlunthuis
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
library(effects)
library(texreg)
library(lme4)
if(!exists("newcomers")){
source("01_build_datasets.R")
}
nosave <- FALSE
sample <- FALSE
if(sample == TRUE){
source("lib-01-sample-datasets.R")
ns4.ds <- sample.ns4.edits()
weights <- ns4.ds$weight
}else{
ns4.ds <- ns4.reg.edits
}
ns4.ds <- ns4.ds[,":="(wiki.age.log = log1p(as.double(wiki.age,units="years")),
age.log = log1p(as.double(age,units="years")),
wiki.age = as.double(wiki.age,units='years'),
quarter = as.factor(paste0(year(date.time),"_",ceiling(month(date.time)/4))),
age = as.double(age,units='years'))]
ns4.ds <- ns4.ds[,":="(time.first.wikia.edit = min(time.first.edit)),by=.(editor)]
ns4.ds.all.newcomers <- ns4.ds
ns4.ds <- ns4.ds[time.first.wikia.edit == time.first.edit]
ns4.summary.stats <- list()
ns4.summary.stats$p.reverted <- mean(ns4.ds$reverted)
ns4.summary.stats$var.reverted <- var(ns4.ds$reverted)
ns4.summary.stats$mean.editor.age <- mean(ns4.ds$age)
ns4.summary.stats$var.editor.age <- var(ns4.ds$age)
ns4.summary.stats$median.editor.age <- median(ns4.ds$age)
ns4.summary.stats$mean.wiki.age <- mean(ns4.ds$wiki.age)
ns4.summary.stats$var.wiki.age <- var(ns4.ds$wiki.age)
ns4.summary.stats$median.wiki.age <- median(ns4.ds$wiki.age)
remember(ns4.summary.stats)
print('fit morgan model')
f.morgan <- as.formula("reverted ~ age + wiki.age + quarter + wiki.name")
morgan.model <- glm(f.morgan,data=ns4.ds,family=binomial(link='logit'))
saveRDS(morgan.model,"morgan.model.RDS")
remember(extract(morgan.model),"morgan.model",silent=TRUE)
print('fit morgan model weights')
f.morgan <- as.formula("reverted ~ age + wiki.age + quarter + wiki.name")
n.total.wikis <- length(unique(ns4.ds$wiki.name))
weight.per.wiki <- nrow(ns4.ds)/n.total.wikis
ns4.ds <- ns4.ds[,weights := weight.per.wiki/.N, by=.(wiki.name)]
morgan.model.weighted <- glm(f.morgan,data=ns4.ds,family=binomial(link='logit'),weights=ns4.ds$weights)
saveRDS(morgan.model.weighted,"morgan.model.weighted.RDS")
remember(extract(morgan.model.weighted),"morgan.model.weighted",silent=TRUE)
print('fit morgan model weights')
f.morgan <- as.formula("reverted ~ age + wiki.age + quarter + wiki.name")
ns4.ds <- ns4.ds[,N:=.N,by=wiki.name]
ns4.ds.temp <- ns4.ds
min.edits <- 10
remember(print(1 - length(unique(ns4.ds[N>=min.edits]$wiki.name))/length(unique(ns4.ds$wiki.name))),"p.wikis.removed.weighted2")
# remove the bottom 24.1% of wikis
ns4.ds <- ns4.ds[N>=min.edits]
n.total.wikis <- length(unique(ns4.ds$wiki.name))
weight.per.wiki <- nrow(ns4.ds)/n.total.wikis
ns4.ds <- ns4.ds[,weights := weight.per.wiki/.N, by=.(wiki.name)]
morgan.model.weighted2 <- glm(f.morgan,data=ns4.ds,family=binomial(link='logit'),weights=ns4.ds$weights)
saveRDS(morgan.model.weighted2,"morgan.model.weighted2.RDS")
remember(extract(morgan.model.weighted2),"morgan.model.weighted2",silent=TRUE)
ns4.ds <- ns4.ds.temp
print('fit morgan model all newcomers')
morgan.model.all.newcomers <- glm(f.morgan,data=ns4.ds.all.newcomers,family=binomial(link='logit'))
saveRDS(morgan.model.all.newcomers,"morgan.model.all.newcomers.RDS")
remember(extract(morgan.model.all.newcomers),"morgan.model.all.newcomers",silent=TRUE)
print('fitting RE model')
re.icc.reverted.model <- glmer(as.formula("reverted ~ + (1 | wiki.name) -1 "),data=ns4.ds,family=binomial(link=logit))
saveRDS(re.icc.reverted.model,"re.icc.reverted.model.RDS")
varcorrmat <- as.data.table(VarCorr(re.icc.reverted.model))
wiki.var <- varcorrmat[grp=='wiki.name' & var1=="(Intercept)" ,vcov]
group.var <- var(residuals(re.icc.reverted.model))
icc <- wiki.var/(group.var + wiki.var)
remember(varcorrmat,'icc.reverted.varcorrmat')
remember(group.var,'icc.reverted.group.var')
remember(icc,'icc.reverted')
## print("fit morgan model sample")
## sample.size <- 30
## ns4.ds <- ns4.ds[,in.sample:=(.N >= sample.size),by=wiki.name]
## # DT[,.SD[sample(.N, min(3,.N))],by = a]
## ns4.ds.equal.sample <- ns4.ds[,.SD[sample(.N,min(sample.size,.N))], by=wiki.name]
## morgan.model.sampled <- glm(f.morgan,data=ns4.ds.equal.sample,family=binomial(link='logit'))
## saveRDS(morgan.model.sampled,"morgan.model.sampled.RDS")
## remember(extract(morgan.model.sampled),"morgan.model.sampled",silent=TRUE)
## ns4.model2.formula <- as.formula("reverted ~ age.log + wiki.age + quarter")
## ns4.model2 <- glm(ns4.model2,data=ns4.ds,family=binomial(link='logit'),weights=weights)
## remember(extract(ns4.model2),"ns4.model2")
## print('fit morgan no pooling model')
## f.morgan <- as.formula("reverted ~ age.log + wiki.age + quarter + wiki.name:age.log + wiki.name:wiki.age")
## morgan.model <- glm(f.morgan,data=ns4.ds,family=binomial(link='logit'),weights=weights)
## remember(extract(morgan.model),"morgan.model")
## re.ns4.model <- glmer(as.formula("reverted ~ age.log + wiki.age + quarter | wiki.name"),data=ns4.ds,family=binomial(link='logit'),weights=weights)
## remember(extract(re.ns4.model),'re.ns4.model')
## print('fit morgan.robustness.1 model')
## f.morgan.robustness.1 <- as.formula("reverted ~ age.log + wiki.age + quarter + wiki.name")
## ns4.reg.edits.robustness <- build.namespace4.dataset(all.edits[p.reverted < 0.5])
## ns4.reg.edits.robustness[,":="(wiki.age.log = log1p(as.double(wiki.age,units="weeks")),
## age.log = log1p(as.double(age,units="weeks")),
## wiki.age = as.double(wiki.age,units='weeks'),
## quarter = as.factor(paste0(year(date.time),"_",ceiling(month(date.time)/4))))]
## morgan.robustness.1.model <- glm(f.morgan.robustness.1,data=pns4.reg.edits.robustness,family=binomial(link='logit'),weights=weights)
## saveRDS(morgan.robustness.1.model,"morgan.robustness.1.model.RDS")
## remember(extract(morgan.robustness.1.model),"morgan.robustness.1.model")
## ns4.ds[,":="(wiki.age.log = log1p(as.numeric(wiki.age,units="weeks")), age.log = log1p(as.numeric(age,units="weeks")))]
## f.ns4.2 <- as.formula("reverted ~ age.log + wiki.age.log + age.log|wiki.age.log + wiki.name")
## ns4.2.model <- glm(f.morgan,data=ns4.ds,family=binomial(link='logit'),weights=weights)
## remember(extract(ns4.2.model))
## summary statistics for namespace 4 edits