1
0

Update on Overleaf.

This commit is contained in:
2023-02-22 19:00:30 +00:00
committed by node
commit c5e0a01713
133 changed files with 24196 additions and 0 deletions

View File

@@ -0,0 +1,266 @@
library(data.table)
library(ggplot2)
source('resources/functions.R')
plot.robustness.1 <- function(iv='x'){
## robustness check 1 test g
r <- readRDS('robustness_1.RDS')
baseline_df <- readRDS('remembr.RDS')[['plot.df.example.2']]
robust_df <- data.table(r$robustness_1)
## just compare the mle methods in the two examples
robust_df <- robust_df[Bzy!=0]
robust_df <- robust_df[Bzx!=0]
baseline_df[method=='true', method:='True']
robust_df[method=='true', method:='True']
baseline_df <- baseline_df[(method=='mle') | (method=='True') | (method=='naive')]
robust_df <- robust_df[(method=='mle') | (method=='True')]
baseline_df[method=='mle',method:='MLE Reported']
robust_df[method=='mle',method:='No Z in Error Model']
df <- rbind(baseline_df, robust_df, fill=TRUE)
df[method=='naive', method:='Naive']
df <- df[(N %in% c(1000,5000)) & (m %in% c(200,100))]
p <- plot.simulation(df,iv=iv,levels=c('MLE Reported','No Z in Error Model', 'Naive', 'True'))
grid.draw(p)
}
plot.robustness.1.checkassumption <- function(iv='x'){
## robustness check 1 test g
r <- readRDS('robustness_1.RDS')
baseline_df <- readRDS('remembr.RDS')[['plot.df.example.2']]
robust_df <- data.table(r$robustness_1)
## just compare the mle methods in the two examples
robust_df <- robust_df[Bzy==0]
robust_df <- robust_df[Bzx!=0]
baseline_df[method=='true', method:='True']
robust_df[method=='true', method:='True']
baseline_df <- baseline_df[(method=='mle') | (method=='naive')]
robust_df <- robust_df[(method=='mle') | (method=='True')]
baseline_df[method=='mle',method:='MLE Reported']
robust_df[method=='mle',method:='No Z in Error Model']
df <- rbind(baseline_df, robust_df, fill=TRUE)
df[method=='naive', method:='Naive']
df <- df[(N %in% c(1000,5000)) & (m %in% c(200,100))]
p <- plot.simulation(df,iv=iv,levels=c('MLE Reported','No Z in Error Model', 'Naive', 'True'))
grid.draw(p)
}
plot.robustness.1.dv <- function(iv='z'){
## robustness check 1 test g
r <- readRDS('robustness_1_dv.RDS')
baseline_df <- readRDS('remembr.RDS')[['plot.df.example.4']]
robust_df <- data.table(r$robustness_1_dv)
## just compare the mle methods in the two examples
baseline_df[method=='true', method:='True']
robust_df[method=='true', method:='True']
robust_df <- robust_df[Bxy!=0]
robust_df <- robust_df[Bzy!=0]
# robust_df <- robust_df[Bzx==-0.1]
baseline_df <- baseline_df[(method=='mle') | (method=='True') | (method=='naive')]
robust_df <- robust_df[(method=='mle') | (method=='True')]
baseline_df[method=='mle',method:='MLE Reported']
robust_df[method=='mle',method:='No Z in Error Model']
df <- rbind(baseline_df, robust_df, fill=TRUE)
df <- df[(N %in% c(1000,5000)) & (m %in% c(200,100))]
df[method=='naive', method:='Naive']
p <- plot.simulation(df,iv=iv,levels=c('MLE Reported','No Z in Error Model','Naive', 'True'))
grid.draw(p)
}
plot.robustness.2.iv <- function(iv, n.annotations=100, n.classifications=5000){
r <- readRDS("robustness_2.RDS")
robust_df <- data.table(r[['robustness_2']])
robust_df <- robust_df[(m==n.annotations) & (N==n.classifications)]
new.levels <- c("true"="True","naive"="Naïve","amelia.full"="MI", "mecor"="mecor","gmm"="GMM", "mle"="MLE", "zhang"="PL","feasible"="Feasible")
robust_df <- robust_df[,method := new.levels[method]]
robust_df <- robust_df[method != "Feasible"]
p <- .plot.simulation(robust_df, iv=iv, levels=c("True","Naïve","MI", "GMM", "MLE", "PL", "Feasible"))
p <- p + facet_wrap(prediction_accuracy~., ncol=4,as.table=F)
p <- p + scale_x_discrete(labels=label_wrap_gen(14)) + ylab("Estimate") + xlab("Method") + coord_flip()
p <- arrangeGrob(p,
top=grid.text("AC Accuracy",x=0.32,just='right'))
grid.draw(p)
}
robust2 <- readRDS("robustness_2_dv.RDS")
robust_2_df <- data.table(robust2[['robustness_2_dv']])
robust_2_min_acc <- min(robust_2_df[,prediction_accuracy])
robust_2_max_acc <- max(robust_2_df[,prediction_accuracy])
plot.robustness.2.dv <- function(iv, n.annotations=100, n.classifications=5000){
r <- readRDS("robustness_2_dv.RDS")
robust_df <- data.table(r[['robustness_2_dv']])
#temporary work around a bug in the makefile
## if('Px' %in% names(robust_df))
## robust_df <- robust_df[is.na(Px)]
robust_df <- robust_df[(m==n.annotations) & (N==n.classifications)]
new.levels <- c("true"="True","naive"="Naïve","amelia.full"="MI", "mecor"="mecor","gmm"="GMM", "mle"="MLE", "zhang"="PL","feasible"="Feasible")
robust_df <- robust_df[,method := new.levels[method]]
robust_df <- robust_df[method != "Feasible"]
p <- .plot.simulation(robust_df, iv=iv, levels=c("True","Naïve","MI", "GMM", "MLE", "PL", "Feasible"))
p <- p + facet_wrap(prediction_accuracy~., ncol=4,as.table=F)
p <- p + scale_x_discrete(labels=label_wrap_gen(14)) + ylab("Estimate") + xlab("Method") + coord_flip()
p <- arrangeGrob(p,
top=grid.text("AC Accuracy",x=0.32,just='right'))
grid.draw(p)
}
plot.robustness.3.iv <- function(iv, n.annotations=100, n.classifications=5000){
r <- readRDS('robustness_3.RDS')
robust_df <- data.table(r[['robustness_3']])
r2 <- readRDS('robustness_3_proflik.RDS')
robust_df_proflik <- data.table(r2[['robustness_3_proflik']])
new.levels <- c("true"="True","naive"="Naïve","amelia.full"="MI", "mecor"="mecor","gmm"="GMM", "mle"="MLE", "zhang"="PL","feasible"="Feasible")
robust_df <- robust_df[,method := new.levels[method]]
robust_df <- robust_df[method != "Feasible"]
robust_df <- robust_df[method=='MLE',method:='Fischer approximation']
robust_df_proflik <- robust_df_proflik[(m==n.annotations) & (N==n.classifications)]
robust_df_proflik <- robust_df_proflik[,method := new.levels[method]]
robust_df_proflik <- robust_df_proflik[method=='MLE',method:='Profile likelihood']
robust_df_proflik <- robust_df_proflik[method != "Feasible"]
df <- df[(m==n.annotations) & (N==n.classifications)]
df <- rbind(robust_df, robust_df_proflik)
p <- .plot.simulation(df, iv=iv, levels=c("True","Naïve","MI", "GMM", "Profile likelihood","Fischer approximation", "PL", "Feasible"))
p <- p + facet_wrap(Px~., ncol=3,as.table=F)
p <- p + scale_x_discrete(labels=label_wrap_gen(14)) + ylab("Estimate") + xlab("Method") + coord_flip()
p <- arrangeGrob(p,
top=grid.text("P(X)",x=0.32,just='right'))
grid.draw(p)
}
plot.robustness.3.dv <- function(iv, n.annotations=100, n.classifications=1000){
r <- readRDS('robustness_3_dv.RDS')
robust_df <- data.table(r[['robustness_3_dv']])
new.levels <- c("true"="True","naive"="Naïve","amelia.full"="MI", "mecor"="mecor","mle"="MLE", "zhang"="PL","feasible"="Feasible")
robust_df <- robust_df[(m==n.annotations) & (N==n.classifications)]
robust_df <- robust_df[,method := new.levels[method]]
robust_df <- robust_df[method != "Feasible"]
p <- .plot.simulation(robust_df, iv=iv, levels=c("True","Naïve","MI", "GMM", "MLE", "PL", "Feasible"))
p <- p + facet_wrap(B0~., ncol=3,as.table=F)
p <- p + scale_x_discrete(labels=label_wrap_gen(14)) + ylab("Estimate") + xlab("Method") + coord_flip()
p <- arrangeGrob(p,
top=grid.text("P(Y)",x=0.32,just='right'))
grid.draw(p)
}
plot.robustness.4.iv <- function(iv, n.annotations=100, n.classifications=1000){
r <- readRDS('robustness_4.RDS')
robust_df <- data.table(r[['robustness_4']])
new.levels <- c("true"="True","naive"="Naïve","amelia.full"="MI", "mecor"="mecor","gmm"="GMM", "mle"="MLE", "zhang"="PL","feasible"="Feasible")
robust_df <- robust_df[(m==n.annotations) & (N==n.classifications)]
robust_df <- robust_df[,method := new.levels[method]]
robust_df <- robust_df[method != "Feasible"]
robust_df <- robust_df[,y_bias=factor(robust_df$y_bias,levels=sort(unique(robust_df$y_bias),decreasing=TRUE))]
p <- .plot.simulation(robust_df, iv=iv, levels=c("True","Naïve","MI", "GMM", "MLE", "PL", "Feasible"))
p <- p + facet_wrap(y_bias~., ncol=3,as.table=T)
p <- p + scale_x_discrete(labels=label_wrap_gen(14)) + ylab("Estimate") + xlab("Method") + coord_flip()
p <- arrangeGrob(p,
top=grid.text("Coefficient of Y for W",x=0.32,just='right'))
grid.draw(p)
}
plot.robustness.4.iv <- function(iv, n.annotations=100, n.classifications=1000){
r <- readRDS('robustness_4.RDS')
robust_df <- data.table(r[['robustness_4']])
new.levels <- c("true"="True","naive"="Naïve","amelia.full"="MI", "mecor"="mecor","gmm"="GMM", "mle"="MLE", "zhang"="PL","feasible"="Feasible")
robust_df <- robust_df[(m==n.annotations) & (N==n.classifications)]
robust_df <- robust_df[,method := new.levels[method]]
robust_df <- robust_df[method != "Feasible"]
robust_df <- robust_df[,y_bias=factor(robust_df$y_bias,levels=sort(unique(robust_df$y_bias),decreasing=TRUE))]
p <- .plot.simulation(robust_df, iv=iv, levels=c("True","Naïve","MI", "GMM", "MLE", "PL", "Feasible"))
p <- p + facet_wrap(y_bias~., ncol=3,as.table=T)
p <- p + scale_x_discrete(labels=label_wrap_gen(14)) + ylab("Estimate") + xlab("Method") + coord_flip()
p <- arrangeGrob(p,
top=grid.text("Coefficient of Y for W",x=0.32,just='right'))
grid.draw(p)
}
plot.robustness.4.dv <- function(iv, n.annotations=100, n.classifications=1000){
r <- readRDS('robustness_4_dv.RDS')
robust_df <- data.table(r[['robustness_4']])
new.levels <- c("true"="True","naive"="Naïve","amelia.full"="MI", "mecor"="mecor","mle"="MLE", "zhang"="PL","feasible"="Feasible")
robust_df <- robust_df[(m==n.annotations) & (N==n.classifications)]
robust_df <- robust_df[,method := new.levels[method]]
robust_df <- robust_df[method != "Feasible"]
robust_df <- robust_df[,z_bias=factor(z_bias, levels=sort(unique(z_bias),descending=TRUE))]
p <- .plot.simulation(robust_df, iv=iv, levels=c("True","Naïve","MI", "GMM", "MLE", "PL", "Feasible"))
p <- p + facet_wrap(z_bias~., ncol=3,as.table=F)
p <- p + scale_x_discrete(labels=label_wrap_gen(14)) + ylab("Estimate") + xlab("Method") + coord_flip()
p <- arrangeGrob(p,
top=grid.text("Coefficient of Z on W",x=0.32,just='right'))
grid.draw(p)
}

55
resources/#variables.R# Normal file
View File

@@ -0,0 +1,55 @@
library(knitr)
library(ggplot2)
library(data.table)
knitr::opts_chunk$set(fig.show='hold')
f <- function (x) {formatC(x, format="d", big.mark=',')}
format.percent <- function(x) {x<-as.numeric(x);paste(f(x*100),"\\%",sep='')}
theme_set(theme_bw())
r <- readRDS('remembr.RDS')
attach(r)
r2 <- readRDS('remember_irr.RDS')
attach(r2)
r3 <- readRDS('remember_grid_sweep.RDS')
attach(r3)
## simulation.summary.df <- data.table(sample.4
## simulation.summary.df <- kable(simulation.summary.df,format='latex',row.names=T, column.names=c("Factors", "Input Parameters")
sim1a.cor.xz <- as.numeric(unlist(example.1['med.cor.xz']))
sim1a.acc <- unlist(example.1['med.accuracy'])
sim1b.acc <- unlist(example.2['med.accuracy'])
sim1b.acc.y1 <- unlist(example.2['med.accuracy.y1'])
sim1b.acc.y0 <- example.2['med.accuracy.y0']
(sim1b.fnr <- example.2['med.fnr'])
(sim1b.fnr.y0 <- example.2['med.fnr.y0'])
(sim1b.fnr.y1 <- example.2['med.fnr.y1'])
sim1b.fpr <- example.2['med.fpr']
sim1b.fpr.y0 <- example.2['med.fpr.y0']
sim1b.fpr.y1 <- example.2['med.fpr.y1']
sim1b.cor.resid.w_pred <- as.numeric(unlist(example.2['cor.resid.w_pred']))
(sim1b.cor.xz <- example.2['med.cor.xz'])
sim2a.AC.acc <- example.3['med.accuracy']
sim2a.lik.ratio <- example.3['med.lik.ratio']
sim2a.cor.xz <- as.numeric(example.3['med.cor.xz'])
sim2b.AC.acc <- example.4['med.accuracy']
sim2b.lik.ratio <- example.4['med.lik.ratio']
(sim2b.error.cor.x <- as.numeric(unlist(example.4['med.error.cor.x'])))
(sim2b.error.cor.z <- as.numeric(unlist(example.4['med.error.cor.z'])))
n.simulations <- max(unlist(example_1_jobs$seed))
sim1a.cor.xz <- as.numeric(unlist(example.3['med.cor.xz']))
sim1.R2 <- unlist(example_1_jobs$y_explained_variance)
N.sizes <- unlist(example_1_jobs$N)
N.sizes <- N.sizes[N.sizes!=800]
m.sizes <- unlist(example_1_jobs$m)
sim2.Bx <- as.numeric(example_4_jobs$Bxy)
sim2.Bz <- as.numeric(example_4_jobs$Bzy)
sim1.z.sd <- 0.5
irr.coder.accuracy <- unlist(example_5_jobs$coder_accuracy)
med.loco.accuracy <- unlist(example.5$med.loco.acc)

82
resources/functions.R Normal file
View File

@@ -0,0 +1,82 @@
library(grid)
library(gridExtra)
library(gtable)
format.percent <- function(x,digits=1){paste(round(x*100,digits),"\\%",sep='')}
f <- function (x) {formatC(x, format="d", big.mark=',')}
plot.simulation <- function(plot.df, iv='x', levels=c("true","naive", "amelia.full","mecor","gmm","mle", "zhang","feasible"),facet_lhs='m',facet_rhs='N'){
p <- .plot.simulation(plot.df, iv, levels)
p <- p + facet_grid(as.formula(paste(facet_lhs,'~',facet_rhs)),as.table=F)
p <- p + scale_x_discrete(labels=label_wrap_gen(14)) + ylab("Estimate") + xlab("Method") + coord_flip()
p <- arrangeGrob(p,
top=grid.text("No. classifications",x=0.32,just='right'),
right=grid.text("No. annotations",y=0.345,just='right',rot=270))
return(p)
}
.plot.simulation <- function(plot.df, iv='x', levels=c("true","naive", "amelia.full","mecor","gmm","mle", "zhang","feasible")){
plot.df <- copy(plot.df)
plot.df <- plot.df[,':='(method=factor(method,levels=levels,ordered=T),
N=factor(N),
m=factor(m))]
plot.df <- plot.df[,method.old:=method]
true.est <- mean(plot.df[(method=='True') & (variable==iv)]$mean.est)
plot.df <- plot.df[(variable==iv)&(method !="True")]
p <- ggplot(plot.df, aes(y=mean.est, ymax=est.upper.95, ymin=est.lower.95, x=method))
p <- p + geom_hline(aes(yintercept=true.est),linetype=2)
p <- p + geom_pointrange(shape=1,size=0.5)
p <- p + geom_linerange(aes(ymax=mean.ci.upper, ymin=mean.ci.lower),position=position_nudge(x=0.4), color='grey40')
return(p)
}
plot.simulation.iv <- function(plot.df, iv='x'){
plot.df <- plot.df[(N!=8000) & (m!=800) & (m!=200)]
new.levels <- c("true"="True","naive"="Naïve","amelia.full"="MI", "mecor"="mecor","gmm"="GMM", "mle"="MLE", "zhang"="PL","feasible"="Feasible")
plot.df[,method := new.levels[method]]
return(plot.simulation(plot.df, iv, levels=c("True","Naïve","MI", "GMM", "MLE", "PL", "Feasible")))
}
plot.simulation.dv <- function(plot.df, iv='x'){
plot.df <- copy(plot.df)
plot.df <- plot.df[(N!=8000) & (m!=800) & (m!=200)]
new.levels <- c("true"="True","naive"="Naïve","amelia.full"="MI", "mecor"="mecor","gmm"="GMM", "mle"="MLE", "zhang"="PL","feasible"="Feasible")
plot.df[,method:=new.levels[method]]
return(plot.simulation(plot.df, iv, levels=c("True","Naïve", "MI","MLE","PL","Feasible")))
}
plot.simulation.irr <- function(plot.df,iv='x'){
plot.df <- copy(plot.df)
new.levels <- c('true'="True","loa0.feasible"="1 coder", "loco.feasible"="2 coders", "loa0.mle"="1 coder MLE", "loco.mle"="2 coders MLE", "amelia.full"="2 coders MI", "zhang"="2 coders PL", "gmm"="2 coders GMM")
plot.df <- plot.df[,method:=new.levels[method]]
return(plot.simulation(plot.df, iv, levels=c("True","1 coder", "2 coders", "1 coder MLE","2 coders MLE","2 coders MI","2 coders PL","2 coders GMM")))
}
plot.simulation.irr.dv <- function(plot.df,iv='x'){
plot.df <- copy(plot.df)
new.levels <- c('true'="True","naive"="Naïve","loa0.feasible"="1 coder", "loco.feasible"="Feasible", "loa0.mle"="1 coder MLE", "loco.mle"="MLE", "amelia.full"="MI", "zhang"="PL", "gmm"="GMM")
plot.df <- plot.df[,method:=new.levels[method]]
return(plot.simulation(plot.df, iv, levels=c("True","Naïve", "Feasible", "MLE","MI","PL")))
}

View File

@@ -0,0 +1,111 @@
library(scales)
library(data.table)
library(ggplot2)
iv.example <- readRDS("iv_perspective_example.RDS")
dv.example <- readRDS("dv_perspective_example.RDS")
iv.sample.prop <- iv.example$cc_ex_tox.likes.race_disclosed.medsampsample.prop
dv.sample.prop <- dv.example$cc_ex_tox.likes.race_disclosed.largesampsample.prop
iv.sample.count <- iv.example$cc_ex_tox.likes.race_disclosed.medsampsample.count
dv.sample.count <- dv.example$cc_ex_tox.likes.race_disclosed.largesampsample.count
plot.cc.example <- function(datalist, name, varnames=NULL, varorder=NULL, include.models=c("Automatic Classification", "All Annotations")){
model.names <- c("Automatic Classification", "All Annotations", "Annotation Sample", "Error Correction")
glm.par.names <- paste0(name,"coef_",c("pred", "coder", "sample"), "_model")
measerr.par.name <- paste0(name,"measerr_model_par")
glm.pars <- datalist[glm.par.names]
n.pars <- length(glm.pars[[1]])
all.pars <- append(glm.pars, list("corrected"=datalist[[measerr.par.name]][1:n.pars]))
names(all.pars) <- model.names
df.pars <- as.data.table(data.frame(all.pars),keep.rownames=TRUE)
if(!is.null(varnames)){
df.pars[, rn := varnames]
}
setnames(df.pars, old="rn", new="variable")
glm.stderr.names <- paste0(name,"se_",c("pred", "coder", "sample"), "_model")
glm.stderr <- datalist[glm.stderr.names]
measerr.stderr.name <- paste0(name,"measerr_model_stderr")
all.stderr <- append(glm.stderr, list("corrected"=datalist[[measerr.stderr.name]][1:n.pars]))
names(all.stderr) <- model.names
df.stderr <- as.data.table(data.frame(all.stderr), keep.rownames=TRUE)
if(!is.null(varnames)){
df.stderr[, rn := varnames]
}
setnames(df.stderr, old="rn", new="variable")
df.pars <- melt(df.pars, id.vars = "variable", variable.name = "Model", value.name = "Estimate")
df.stderr <- melt(df.stderr, id.vars = "variable",variable.name = "Model", value.name = "StdErr")
df <- df.pars[df.stderr, on = c("variable", "Model")]
df[,":="(UpperCI = Estimate + 1.96*sqrt(StdErr),
LowerCI = Estimate - 1.96*sqrt(StdErr))]
if(!is.null(varorder)){
df[,variable:=factor(variable,levels=varorder)]
}
df[,Model:= factor(gsub('\\.',' ', Model), levels=rev(model.names))]
df <- df[Model %in% include.models]
p <- ggplot(df[variable != "Intercept"], aes(y = Estimate, x=Model, ymax=LowerCI, ymin=UpperCI, group=variable))
p <- p + geom_pointrange(shape=1) + facet_wrap('variable',scales='free_x',nrow=1,as.table=F) + geom_hline(aes(yintercept=0),linetype='dashed',color='gray40') + coord_flip() + xlab("")
p <- p + scale_y_continuous(breaks=breaks_extended(4))
return(p)
}
plot.civilcomments.dv.example <- function(include.models=c("Automatic Classification", "All Annotations")){
return(plot.cc.example(dv.example, "cc_ex_tox.likes.race_disclosed.medsamp", varnames=c("Intercept", "Likes", "Identity Disclosure", "Likes:Identity Disclosure"),varorder=c("Intercept", "Likes", "Identity Disclosure", "Likes:Identity Disclosure"), include.models=include.models) + ylab("Coefficients and 95% Confidence Intervals") + ggtitle("Logistic Regression Predicting Toxicity"))
}
plot.civilcomments.iv.example <- function(include.models=c("Automatic Classification", "All Annotations")){
plot.cc.example(iv.example, "cc_ex_tox.likes.race_disclosed.medsamp", varnames=c("Intercept", "Likes", "Likes:Toxicity", "Toxicity"),varorder=c("Intercept", "Likes", "Toxicity", "Likes:Toxicity"), include.models=include.models) + ylab("Coefficients and 95% Confidence Intervals") + ggtitle("Logistic Regression Predicting Racial/Ethnic Identity Disclosure")
}
plot.civilcomments.iv.example.2 <- function(){
attach(iv.example)
df.pars <- rbind(cc_ex_tox.likes.race_disclosedcoef_pred_model,
cc_ex_tox.likes.race_disclosedcoef_coder_model,
cc_ex_tox.likes.race_disclosedcoef_sample_model,
cc_ex_tox.likes.race_disclosedmeaserr_model_par[1:3]
)
rownames(df.pars) <- c('predictions', 'coders', 'sample', 'corrected')
df.stderr <- rbind(cc_ex_tox.likes.race_disclosedse_pred_model,
cc_ex_tox.likes.race_disclosedse_coder_model,
cc_ex_tox.likes.race_disclosedse_sample_model,
cc_ex_tox.likes.race_disclosedmeaserr_model_stderr[1:3]
)
rownames(df.pars) <- c('predictions', 'coders', 'sample', 'corrected')
ci.upper <- df.pars + 1.96 * sqrt(df.stderr)
ci.lower <- df.pars - 1.96 * sqrt(df.stderr)
return(plot.cc.example(df.pars, ci.lower, ci.upper))
}

View File

@@ -0,0 +1,268 @@
library(data.table)
library(ggplot2)
source('resources/functions.R')
plot.robustness.1 <- function(iv='x'){
## robustness check 1 test g
r <- readRDS('robustness_1.RDS')
baseline_df <- readRDS('remembr.RDS')[['plot.df.example.2']]
robust_df <- data.table(r$robustness_1)
## just compare the mle methods in the two examples
robust_df <- robust_df[Bzy!=0]
robust_df <- robust_df[Bzx!=0]
baseline_df[method=='true', method:='True']
robust_df[method=='true', method:='True']
baseline_df <- baseline_df[(method=='mle') | (method=='True') | (method=='naive')]
robust_df <- robust_df[(method=='mle') | (method=='True')]
baseline_df[method=='mle',method:='MLE Reported']
robust_df[method=='mle',method:='No Z in Error Model']
df <- rbind(baseline_df, robust_df, fill=TRUE)
df[method=='naive', method:='Naive']
df <- df[(N %in% c(1000,5000)) & (m %in% c(200,100))]
p <- plot.simulation(df,iv=iv,levels=c('MLE Reported','No Z in Error Model', 'Naive', 'True'))
grid.draw(p)
}
plot.robustness.1.checkassumption <- function(iv='x'){
## robustness check 1 test g
r <- readRDS('robustness_1.RDS')
baseline_df <- readRDS('remembr.RDS')[['plot.df.example.2']]
robust_df <- data.table(r$robustness_1)
## just compare the mle methods in the two examples
robust_df <- robust_df[Bzy==0]
robust_df <- robust_df[Bzx!=0]
baseline_df[method=='true', method:='True']
robust_df[method=='true', method:='True']
baseline_df <- baseline_df[(method=='mle') | (method=='naive')]
robust_df <- robust_df[(method=='mle') | (method=='True')]
baseline_df[method=='mle',method:='MLE Reported']
robust_df[method=='mle',method:='No Z in Error Model']
df <- rbind(baseline_df, robust_df, fill=TRUE)
df[method=='naive', method:='Naive']
df <- df[(N %in% c(1000,5000)) & (m %in% c(200,100))]
p <- plot.simulation(df,iv=iv,levels=c('MLE Reported','No Z in Error Model', 'Naive', 'True'))
grid.draw(p)
}
plot.robustness.1.dv <- function(iv='z'){
## robustness check 1 test g
r <- readRDS('robustness_1_dv.RDS')
baseline_df <- readRDS('remembr.RDS')[['plot.df.example.4']]
robust_df <- data.table(r$robustness_1_dv)
## just compare the mle methods in the two examples
baseline_df[method=='true', method:='True']
robust_df[method=='true', method:='True']
robust_df <- robust_df[Bxy!=0]
robust_df <- robust_df[Bzy!=0]
# robust_df <- robust_df[Bzx==-0.1]
baseline_df <- baseline_df[(method=='mle') | (method=='True') | (method=='naive')]
robust_df <- robust_df[(method=='mle') | (method=='True')]
baseline_df[method=='mle',method:='MLE Reported']
robust_df[method=='mle',method:='No Z in Error Model']
df <- rbind(baseline_df, robust_df, fill=TRUE)
df <- df[(N %in% c(1000,5000)) & (m %in% c(200,100))]
df[method=='naive', method:='Naive']
p <- plot.simulation(df,iv=iv,levels=c('MLE Reported','No Z in Error Model','Naive', 'True'))
grid.draw(p)
}
plot.robustness.2.iv <- function(iv, n.annotations=100, n.classifications=5000){
r <- readRDS("robustness_2.RDS")
robust_df <- data.table(r[['robustness_2']])
robust_df <- robust_df[(m==n.annotations) & (N==n.classifications)]
new.levels <- c("true"="True","naive"="Naïve","amelia.full"="MI", "mecor"="mecor","gmm"="GMM", "mle"="MLE", "zhang"="PL","feasible"="Feasible")
robust_df <- robust_df[,method := new.levels[method]]
robust_df <- robust_df[method != "Feasible"]
p <- .plot.simulation(robust_df, iv=iv, levels=c("True","Naïve","MI", "GMM", "MLE", "PL", "Feasible"))
p <- p + facet_wrap(prediction_accuracy~., ncol=4,as.table=F)
p <- p + scale_x_discrete(labels=label_wrap_gen(14)) + ylab("Estimate") + xlab("Method") + coord_flip()
p <- arrangeGrob(p,
top=grid.text("AC Accuracy",x=0.32,just='right'))
grid.draw(p)
}
robust2 <- readRDS("robustness_2_dv.RDS")
robust_2_df <- data.table(robust2[['robustness_2_dv']])
robust_2_min_acc <- min(robust_2_df[,prediction_accuracy])
robust_2_max_acc <- max(robust_2_df[,prediction_accuracy])
plot.robustness.2.dv <- function(iv, n.annotations=100, n.classifications=5000){
r <- readRDS("robustness_2_dv.RDS")
robust_df <- data.table(r[['robustness_2_dv']])
#temporary work around a bug in the makefile
## if('Px' %in% names(robust_df))
## robust_df <- robust_df[is.na(Px)]
robust_df <- robust_df[(m==n.annotations) & (N==n.classifications)]
new.levels <- c("true"="True","naive"="Naïve","amelia.full"="MI", "mecor"="mecor","gmm"="GMM", "mle"="MLE", "zhang"="PL","feasible"="Feasible")
robust_df <- robust_df[,method := new.levels[method]]
robust_df <- robust_df[method != "Feasible"]
p <- .plot.simulation(robust_df, iv=iv, levels=c("True","Naïve","MI", "GMM", "MLE", "PL", "Feasible"))
p <- p + facet_wrap(prediction_accuracy~., ncol=4,as.table=F)
p <- p + scale_x_discrete(labels=label_wrap_gen(14)) + ylab("Estimate") + xlab("Method") + coord_flip()
p <- arrangeGrob(p,
top=grid.text("AC Accuracy",x=0.32,just='right'))
grid.draw(p)
}
plot.robustness.3.iv <- function(iv, n.annotations=100, n.classifications=5000){
r <- readRDS('robustness_3.RDS')
robust_df <- data.table(r[['robustness_3']])
r2 <- readRDS('robustness_3_proflik.RDS')
robust_df_proflik <- data.table(r2[['robustness_3_proflik']])
new.levels <- c("true"="True","naive"="Naïve","amelia.full"="MI", "mecor"="mecor","gmm"="GMM", "mle"="MLE", "zhang"="PL","feasible"="Feasible")
robust_df <- robust_df[(m==n.annotations) & (N==n.classifications)]
robust_df <- robust_df[,method := new.levels[method]]
robust_df <- robust_df[method != "Feasible"]
robust_df <- robust_df[method=='MLE',method:='Fischer likelihood']
robust_df_proflik <- robust_df_proflik[(m==n.annotations) & (N==n.classifications)]
robust_df_proflik <- robust_df_proflik[method=='MLE',method:='Profile likelihood']
robust_df_proflik <- robust_df_proflik[,method := new.levels[method]]
robust_df_proflik <- robust_df_proflik[method != "Feasible"]
df <- rbind(robust_df, robust_df_proflik)
p <- .plot.simulation(df, iv=iv, levels=c("True","Naïve","MI", "GMM", "MLE", "PL", "Feasible"))
p <- p + facet_wrap(Px~., ncol=3,as.table=F)
p <- p + scale_x_discrete(labels=label_wrap_gen(14)) + ylab("Estimate") + xlab("Method") + coord_flip()
p <- arrangeGrob(p,
top=grid.text("P(X)",x=0.32,just='right'))
grid.draw(p)
}
plot.robustness.3.dv <- function(iv, n.annotations=100, n.classifications=1000){
r <- readRDS('robustness_3_dv.RDS')
robust_df <- data.table(r[['robustness_3_dv']])
new.levels <- c("true"="True","naive"="Naïve","amelia.full"="MI", "mecor"="mecor","mle"="MLE", "zhang"="PL","feasible"="Feasible")
robust_df <- robust_df[(m==n.annotations) & (N==n.classifications)]
robust_df <- robust_df[,method := new.levels[method]]
robust_df <- robust_df[method != "Feasible"]
p <- .plot.simulation(robust_df, iv=iv, levels=c("True","Naïve","MI", "GMM", "MLE", "PL", "Feasible"))
p <- p + facet_wrap(B0~., ncol=3,as.table=F)
p <- p + scale_x_discrete(labels=label_wrap_gen(14)) + ylab("Estimate") + xlab("Method") + coord_flip()
p <- arrangeGrob(p,
top=grid.text("P(Y)",x=0.32,just='right'))
grid.draw(p)
}
plot.robustness.4.iv <- function(iv, n.annotations=100, n.classifications=1000){
r <- readRDS('robustness_4.RDS')
robust_df <- data.table(r[['robustness_4']])
new.levels <- c("true"="True","naive"="Naïve","amelia.full"="MI", "mecor"="mecor","gmm"="GMM", "mle"="MLE", "zhang"="PL","feasible"="Feasible")
robust_df <- robust_df[(m==n.annotations) & (N==n.classifications)]
robust_df <- robust_df[,method := new.levels[method]]
robust_df <- robust_df[method != "Feasible"]
robust_df <- robust_df[,y_bias=factor(robust_df$y_bias,levels=sort(unique(robust_df$y_bias),decreasing=TRUE))]
p <- .plot.simulation(robust_df, iv=iv, levels=c("True","Naïve","MI", "GMM", "MLE", "PL", "Feasible"))
p <- p + facet_wrap(y_bias~., ncol=3,as.table=T)
p <- p + scale_x_discrete(labels=label_wrap_gen(14)) + ylab("Estimate") + xlab("Method") + coord_flip()
p <- arrangeGrob(p,
top=grid.text("Coefficient of Y for W",x=0.32,just='right'))
grid.draw(p)
}
plot.robustness.4.iv <- function(iv, n.annotations=100, n.classifications=1000){
r <- readRDS('robustness_4.RDS')
robust_df <- data.table(r[['robustness_4']])
new.levels <- c("true"="True","naive"="Naïve","amelia.full"="MI", "mecor"="mecor","gmm"="GMM", "mle"="MLE", "zhang"="PL","feasible"="Feasible")
robust_df <- robust_df[(m==n.annotations) & (N==n.classifications)]
robust_df <- robust_df[,method := new.levels[method]]
robust_df <- robust_df[method != "Feasible"]
robust_df <- robust_df[,y_bias=factor(robust_df$y_bias,levels=sort(unique(robust_df$y_bias),decreasing=TRUE))]
p <- .plot.simulation(robust_df, iv=iv, levels=c("True","Naïve","MI", "GMM", "MLE", "PL", "Feasible"))
p <- p + facet_wrap(y_bias~., ncol=3,as.table=T)
p <- p + scale_x_discrete(labels=label_wrap_gen(14)) + ylab("Estimate") + xlab("Method") + coord_flip()
p <- arrangeGrob(p,
top=grid.text("Coefficient of Y for W",x=0.32,just='right'))
grid.draw(p)
}
plot.robustness.4.dv <- function(iv, n.annotations=100, n.classifications=1000){
r <- readRDS('robustness_4_dv.RDS')
robust_df <- data.table(r[['robustness_4']])
new.levels <- c("true"="True","naive"="Naïve","amelia.full"="MI", "mecor"="mecor","mle"="MLE", "zhang"="PL","feasible"="Feasible")
robust_df <- robust_df[(m==n.annotations) & (N==n.classifications)]
robust_df <- robust_df[,method := new.levels[method]]
robust_df <- robust_df[method != "Feasible"]
robust_df <- robust_df[,z_bias=factor(z_bias, levels=sort(unique(z_bias),descending=TRUE))]
p <- .plot.simulation(robust_df, iv=iv, levels=c("True","Naïve","MI", "GMM", "MLE", "PL", "Feasible"))
p <- p + facet_wrap(z_bias~., ncol=3,as.table=F)
p <- p + scale_x_discrete(labels=label_wrap_gen(14)) + ylab("Estimate") + xlab("Method") + coord_flip()
p <- arrangeGrob(p,
top=grid.text("Coefficient of Z on W",x=0.32,just='right'))
grid.draw(p)
}

55
resources/variables.R Normal file
View File

@@ -0,0 +1,55 @@
library(knitr)
library(ggplot2)
library(data.table)
knitr::opts_chunk$set(fig.show='hold')
f <- function (x) {formatC(x, format="d", big.mark=',')}
format.percent <- function(x) {x<-as.numeric(x);paste(f(x*100),"\\%",sep='')}
theme_set(theme_bw())
r <- readRDS('remembr.RDS')
attach(r)
r2 <- readRDS('remember_irr.RDS')
attach(r2)
r3 <- readRDS('remember_grid_sweep.RDS')
attach(r3)
## simulation.summary.df <- data.table(sample.4
## simulation.summary.df <- kable(simulation.summary.df,format='latex',row.names=T, column.names=c("Factors", "Input Parameters")
sim1a.cor.xz <- as.numeric(unlist(example.1['med.cor.xz']))
sim1a.acc <- unlist(example.1['med.accuracy'])
sim1b.acc <- unlist(example.2['med.accuracy'])
sim1b.acc.y1 <- unlist(example.2['med.accuracy.y1'])
sim1b.acc.y0 <- example.2['med.accuracy.y0']
(sim1b.fnr <- example.2['med.fnr'])
(sim1b.fnr.y0 <- example.2['med.fnr.y0'])
(sim1b.fnr.y1 <- example.2['med.fnr.y1'])
sim1b.fpr <- example.2['med.fpr']
sim1b.fpr.y0 <- example.2['med.fpr.y0']
sim1b.fpr.y1 <- example.2['med.fpr.y1']
sim1b.cor.resid.w_pred <- as.numeric(unlist(example.2['cor.resid.w_pred']))
(sim1b.cor.xz <- example.2['med.cor.xz'])
sim2a.AC.acc <- example.3['med.accuracy']
sim2a.lik.ratio <- example.3['med.lik.ratio']
sim2a.cor.xz <- as.numeric(example.3['med.cor.xz'])
sim2b.AC.acc <- example.4['med.accuracy']
sim2b.lik.ratio <- example.4['med.lik.ratio']
(sim2b.error.cor.x <- as.numeric(unlist(example.4['med.error.cor.x'])))
(sim2b.error.cor.z <- as.numeric(unlist(example.4['med.error.cor.z'])))
n.simulations <- max(unlist(example_1_jobs$seed))
sim1a.cor.xz <- as.numeric(unlist(example.3['med.cor.xz']))
sim1.R2 <- unlist(example_1_jobs$y_explained_variance)
N.sizes <- unlist(example_1_jobs$N)
N.sizes <- N.sizes[N.sizes!=800]
m.sizes <- unlist(example_1_jobs$m)
sim2.Bx <- as.numeric(example_4_jobs$Bxy)
sim2.Bz <- as.numeric(example_4_jobs$Bzy)
sim1.z.sd <- 0.5
irr.coder.accuracy <- unlist(example_5_jobs$coder_accuracy)
med.loco.accuracy <- unlist(example.5$med.loco.acc)