Update on Overleaf.

2023-02-22 19:00:30 +00:00
commit c5e0a01713
133 changed files with 24196 additions and 0 deletions
--- a/resources/#robustness_check_plots.R#
+++ b/resources/#robustness_check_plots.R#
@@ -0,0 +1,266 @@
+library(data.table)
+library(ggplot2)
+source('resources/functions.R')
+
+plot.robustness.1 <- function(iv='x'){
+
+## robustness check 1 test g
+    r <- readRDS('robustness_1.RDS')
+    baseline_df <- readRDS('remembr.RDS')[['plot.df.example.2']]
+    robust_df <- data.table(r$robustness_1)
+
+    ## just compare the mle methods in the two examples
+    robust_df <- robust_df[Bzy!=0]
+    robust_df <- robust_df[Bzx!=0]
+    baseline_df[method=='true', method:='True']
+    robust_df[method=='true', method:='True']
+
+    baseline_df <- baseline_df[(method=='mle') | (method=='True') | (method=='naive')]
+    robust_df <- robust_df[(method=='mle') | (method=='True')]
+
+    baseline_df[method=='mle',method:='MLE Reported']
+    robust_df[method=='mle',method:='No Z in Error Model']
+
+    df <- rbind(baseline_df, robust_df, fill=TRUE)
+    df[method=='naive', method:='Naive']
+    df <- df[(N %in% c(1000,5000)) & (m %in% c(200,100))]
+    p <- plot.simulation(df,iv=iv,levels=c('MLE Reported','No Z in Error Model', 'Naive', 'True'))
+    grid.draw(p)
+}
+
+plot.robustness.1.checkassumption <- function(iv='x'){
+
+## robustness check 1 test g
+    r <- readRDS('robustness_1.RDS')
+    baseline_df <- readRDS('remembr.RDS')[['plot.df.example.2']]
+    robust_df <- data.table(r$robustness_1)
+
+    ## just compare the mle methods in the two examples
+    robust_df <- robust_df[Bzy==0]
+    robust_df <- robust_df[Bzx!=0]
+    baseline_df[method=='true', method:='True']
+    robust_df[method=='true', method:='True']
+
+    baseline_df <- baseline_df[(method=='mle') | (method=='naive')]
+    robust_df <- robust_df[(method=='mle') | (method=='True')]
+
+    baseline_df[method=='mle',method:='MLE Reported']
+    robust_df[method=='mle',method:='No Z in Error Model']
+
+    df <- rbind(baseline_df, robust_df, fill=TRUE)
+    df[method=='naive', method:='Naive']
+    df <- df[(N %in% c(1000,5000)) & (m %in% c(200,100))]
+    p <- plot.simulation(df,iv=iv,levels=c('MLE Reported','No Z in Error Model', 'Naive', 'True'))
+    grid.draw(p)
+}
+
+plot.robustness.1.dv <- function(iv='z'){
+
+    ## robustness check 1 test g
+    r <- readRDS('robustness_1_dv.RDS')
+    baseline_df <- readRDS('remembr.RDS')[['plot.df.example.4']]
+    robust_df <- data.table(r$robustness_1_dv)
+
+    ## just compare the mle methods in the two examples
+    
+    baseline_df[method=='true', method:='True']
+    robust_df[method=='true', method:='True']
+
+    robust_df <- robust_df[Bxy!=0]
+    robust_df <- robust_df[Bzy!=0]
+    # robust_df <- robust_df[Bzx==-0.1]
+
+    baseline_df <- baseline_df[(method=='mle') | (method=='True') | (method=='naive')]
+    robust_df <- robust_df[(method=='mle') | (method=='True')]
+
+    baseline_df[method=='mle',method:='MLE Reported']
+    robust_df[method=='mle',method:='No Z in Error Model']
+
+    df <- rbind(baseline_df, robust_df, fill=TRUE)
+    df <- df[(N %in% c(1000,5000)) & (m %in% c(200,100))]
+    df[method=='naive', method:='Naive']
+
+    p <- plot.simulation(df,iv=iv,levels=c('MLE Reported','No Z in Error Model','Naive', 'True'))
+    grid.draw(p)
+}
+
+plot.robustness.2.iv <- function(iv, n.annotations=100, n.classifications=5000){
+
+    r <- readRDS("robustness_2.RDS")
+    robust_df <- data.table(r[['robustness_2']])
+
+    robust_df <- robust_df[(m==n.annotations) & (N==n.classifications)]
+
+    new.levels <- c("true"="True","naive"="Naïve","amelia.full"="MI", "mecor"="mecor","gmm"="GMM", "mle"="MLE", "zhang"="PL","feasible"="Feasible")
+
+    robust_df <- robust_df[,method := new.levels[method]]
+    robust_df <- robust_df[method != "Feasible"]
+    p <- .plot.simulation(robust_df, iv=iv, levels=c("True","Naïve","MI", "GMM", "MLE", "PL", "Feasible"))
+    
+    p <- p + facet_wrap(prediction_accuracy~., ncol=4,as.table=F)
+    p <- p + scale_x_discrete(labels=label_wrap_gen(14)) + ylab("Estimate") + xlab("Method") + coord_flip()
+    
+
+    p <- arrangeGrob(p,
+                     top=grid.text("AC Accuracy",x=0.32,just='right'))
+                     
+    grid.draw(p)
+}
+
+robust2 <- readRDS("robustness_2_dv.RDS")
+robust_2_df <- data.table(robust2[['robustness_2_dv']])
+robust_2_min_acc <- min(robust_2_df[,prediction_accuracy])
+robust_2_max_acc <- max(robust_2_df[,prediction_accuracy])
+
+plot.robustness.2.dv <- function(iv, n.annotations=100, n.classifications=5000){
+
+    r <- readRDS("robustness_2_dv.RDS")
+    robust_df <- data.table(r[['robustness_2_dv']])
+
+
+    #temporary work around a bug in the makefile
+    ## if('Px' %in% names(robust_df))
+    ##     robust_df <- robust_df[is.na(Px)]
+    robust_df <- robust_df[(m==n.annotations) & (N==n.classifications)]
+
+    new.levels <- c("true"="True","naive"="Naïve","amelia.full"="MI", "mecor"="mecor","gmm"="GMM", "mle"="MLE", "zhang"="PL","feasible"="Feasible")
+
+    robust_df <- robust_df[,method := new.levels[method]]
+    robust_df <- robust_df[method != "Feasible"]
+    p <- .plot.simulation(robust_df, iv=iv, levels=c("True","Naïve","MI", "GMM", "MLE", "PL", "Feasible"))
+    
+    p <- p + facet_wrap(prediction_accuracy~., ncol=4,as.table=F)
+    p <- p + scale_x_discrete(labels=label_wrap_gen(14)) + ylab("Estimate") + xlab("Method") + coord_flip()
+
+    p <- arrangeGrob(p,
+                     top=grid.text("AC Accuracy",x=0.32,just='right'))
+                     
+    grid.draw(p)
+}
+
+
+plot.robustness.3.iv <- function(iv, n.annotations=100, n.classifications=5000){
+    r <- readRDS('robustness_3.RDS')
+    robust_df <- data.table(r[['robustness_3']])
+    r2 <- readRDS('robustness_3_proflik.RDS')
+    robust_df_proflik <- data.table(r2[['robustness_3_proflik']])
+
+    new.levels <- c("true"="True","naive"="Naïve","amelia.full"="MI", "mecor"="mecor","gmm"="GMM", "mle"="MLE", "zhang"="PL","feasible"="Feasible")
+
+    robust_df <- robust_df[,method := new.levels[method]]
+    robust_df <- robust_df[method != "Feasible"]
+    robust_df <- robust_df[method=='MLE',method:='Fischer approximation']
+
+    robust_df_proflik <- robust_df_proflik[(m==n.annotations) & (N==n.classifications)]
+    robust_df_proflik <- robust_df_proflik[,method := new.levels[method]]
+    robust_df_proflik <- robust_df_proflik[method=='MLE',method:='Profile likelihood']
+    robust_df_proflik <- robust_df_proflik[method != "Feasible"]
+
+    df <- df[(m==n.annotations) & (N==n.classifications)]
+
+    df <- rbind(robust_df, robust_df_proflik)
+
+    p <- .plot.simulation(df, iv=iv, levels=c("True","Naïve","MI", "GMM", "Profile likelihood","Fischer approximation", "PL", "Feasible"))
+    
+    p <- p + facet_wrap(Px~., ncol=3,as.table=F)
+    p <- p + scale_x_discrete(labels=label_wrap_gen(14)) + ylab("Estimate") + xlab("Method") + coord_flip()
+
+    p <- arrangeGrob(p,
+                     top=grid.text("P(X)",x=0.32,just='right'))
+                     
+    grid.draw(p)
+}
+
+plot.robustness.3.dv <- function(iv, n.annotations=100, n.classifications=1000){
+    r <- readRDS('robustness_3_dv.RDS')
+    robust_df <- data.table(r[['robustness_3_dv']])
+
+    new.levels <- c("true"="True","naive"="Naïve","amelia.full"="MI", "mecor"="mecor","mle"="MLE", "zhang"="PL","feasible"="Feasible")
+
+    robust_df <- robust_df[(m==n.annotations) & (N==n.classifications)]
+
+    robust_df <- robust_df[,method := new.levels[method]]
+    robust_df <- robust_df[method != "Feasible"]
+
+    p <- .plot.simulation(robust_df, iv=iv, levels=c("True","Naïve","MI", "GMM", "MLE", "PL", "Feasible"))
+    
+    p <- p + facet_wrap(B0~., ncol=3,as.table=F)
+    p <- p + scale_x_discrete(labels=label_wrap_gen(14)) + ylab("Estimate") + xlab("Method") + coord_flip()
+
+    p <- arrangeGrob(p,
+                     top=grid.text("P(Y)",x=0.32,just='right'))
+                     
+    grid.draw(p)
+}
+
+plot.robustness.4.iv <- function(iv, n.annotations=100, n.classifications=1000){
+    r <- readRDS('robustness_4.RDS')
+    robust_df <- data.table(r[['robustness_4']])
+
+    new.levels <- c("true"="True","naive"="Naïve","amelia.full"="MI", "mecor"="mecor","gmm"="GMM", "mle"="MLE", "zhang"="PL","feasible"="Feasible")
+
+    robust_df <- robust_df[(m==n.annotations) & (N==n.classifications)]
+
+    robust_df <- robust_df[,method := new.levels[method]]
+    robust_df <- robust_df[method != "Feasible"]
+
+    robust_df <- robust_df[,y_bias=factor(robust_df$y_bias,levels=sort(unique(robust_df$y_bias),decreasing=TRUE))]
+    p <- .plot.simulation(robust_df, iv=iv, levels=c("True","Naïve","MI", "GMM", "MLE", "PL", "Feasible"))
+    
+    p <- p + facet_wrap(y_bias~., ncol=3,as.table=T)
+    p <- p + scale_x_discrete(labels=label_wrap_gen(14)) + ylab("Estimate") + xlab("Method") + coord_flip()
+
+    p <- arrangeGrob(p,
+                     top=grid.text("Coefficient of Y for W",x=0.32,just='right'))
+                     
+    grid.draw(p)
+}
+
+
+    
+plot.robustness.4.iv <- function(iv, n.annotations=100, n.classifications=1000){
+    r <- readRDS('robustness_4.RDS')
+    robust_df <- data.table(r[['robustness_4']])
+
+
+    new.levels <- c("true"="True","naive"="Naïve","amelia.full"="MI", "mecor"="mecor","gmm"="GMM", "mle"="MLE", "zhang"="PL","feasible"="Feasible")
+
+    robust_df <- robust_df[(m==n.annotations) & (N==n.classifications)]
+
+    robust_df <- robust_df[,method := new.levels[method]]
+    robust_df <- robust_df[method != "Feasible"]
+
+    robust_df <- robust_df[,y_bias=factor(robust_df$y_bias,levels=sort(unique(robust_df$y_bias),decreasing=TRUE))]
+    p <- .plot.simulation(robust_df, iv=iv, levels=c("True","Naïve","MI", "GMM", "MLE", "PL", "Feasible"))
+    
+    p <- p + facet_wrap(y_bias~., ncol=3,as.table=T)
+    p <- p + scale_x_discrete(labels=label_wrap_gen(14)) + ylab("Estimate") + xlab("Method") + coord_flip()
+
+    p <- arrangeGrob(p,
+                     top=grid.text("Coefficient of Y for W",x=0.32,just='right'))
+                     
+    grid.draw(p)
+}
+
+plot.robustness.4.dv <- function(iv, n.annotations=100, n.classifications=1000){
+    r <- readRDS('robustness_4_dv.RDS')
+    robust_df <- data.table(r[['robustness_4']])
+
+    new.levels <- c("true"="True","naive"="Naïve","amelia.full"="MI", "mecor"="mecor","mle"="MLE", "zhang"="PL","feasible"="Feasible")
+
+    robust_df <- robust_df[(m==n.annotations) & (N==n.classifications)]
+
+    robust_df <- robust_df[,method := new.levels[method]]
+    robust_df <- robust_df[method != "Feasible"]
+
+    robust_df <- robust_df[,z_bias=factor(z_bias, levels=sort(unique(z_bias),descending=TRUE))]
+
+    p <- .plot.simulation(robust_df, iv=iv, levels=c("True","Naïve","MI", "GMM", "MLE", "PL", "Feasible"))
+    p <- p + facet_wrap(z_bias~., ncol=3,as.table=F)
+    p <- p + scale_x_discrete(labels=label_wrap_gen(14)) + ylab("Estimate") + xlab("Method") + coord_flip()
+
+    p <- arrangeGrob(p,
+                     top=grid.text("Coefficient of Z on W",x=0.32,just='right'))
+                     
+    grid.draw(p)
+}
--- a/resources/#variables.R#
+++ b/resources/#variables.R#
@@ -0,0 +1,55 @@
+library(knitr)
+library(ggplot2)
+library(data.table)
+knitr::opts_chunk$set(fig.show='hold')
+f <- function (x) {formatC(x, format="d", big.mark=',')}
+format.percent <- function(x) {x<-as.numeric(x);paste(f(x*100),"\\%",sep='')}
+
+theme_set(theme_bw())
+r <- readRDS('remembr.RDS')
+attach(r)
+r2 <- readRDS('remember_irr.RDS')
+attach(r2)
+r3 <- readRDS('remember_grid_sweep.RDS')
+attach(r3)
+
+
+## simulation.summary.df <- data.table(sample.4
+## simulation.summary.df <- kable(simulation.summary.df,format='latex',row.names=T, column.names=c("Factors", "Input Parameters")
+
+
+sim1a.cor.xz <- as.numeric(unlist(example.1['med.cor.xz']))
+sim1a.acc <- unlist(example.1['med.accuracy'])
+sim1b.acc <- unlist(example.2['med.accuracy'])
+sim1b.acc.y1 <- unlist(example.2['med.accuracy.y1'])
+sim1b.acc.y0 <- example.2['med.accuracy.y0']
+(sim1b.fnr <- example.2['med.fnr'])
+(sim1b.fnr.y0 <- example.2['med.fnr.y0'])
+(sim1b.fnr.y1 <- example.2['med.fnr.y1'])
+sim1b.fpr <- example.2['med.fpr']
+sim1b.fpr.y0 <- example.2['med.fpr.y0']
+sim1b.fpr.y1 <- example.2['med.fpr.y1']
+sim1b.cor.resid.w_pred <- as.numeric(unlist(example.2['cor.resid.w_pred']))
+(sim1b.cor.xz <- example.2['med.cor.xz'])
+
+sim2a.AC.acc <- example.3['med.accuracy']
+sim2a.lik.ratio <- example.3['med.lik.ratio']
+sim2a.cor.xz <- as.numeric(example.3['med.cor.xz'])
+
+
+sim2b.AC.acc <- example.4['med.accuracy']
+sim2b.lik.ratio <- example.4['med.lik.ratio']
+(sim2b.error.cor.x <- as.numeric(unlist(example.4['med.error.cor.x'])))
+(sim2b.error.cor.z <- as.numeric(unlist(example.4['med.error.cor.z'])))
+
+n.simulations <- max(unlist(example_1_jobs$seed))
+sim1a.cor.xz <- as.numeric(unlist(example.3['med.cor.xz']))
+sim1.R2 <- unlist(example_1_jobs$y_explained_variance)
+N.sizes <- unlist(example_1_jobs$N)
+N.sizes <- N.sizes[N.sizes!=800]
+m.sizes <- unlist(example_1_jobs$m)
+sim2.Bx <- as.numeric(example_4_jobs$Bxy)
+sim2.Bz <- as.numeric(example_4_jobs$Bzy)
+sim1.z.sd <- 0.5
+irr.coder.accuracy <- unlist(example_5_jobs$coder_accuracy)
+med.loco.accuracy <-  unlist(example.5$med.loco.acc)
--- a/resources/functions.R
+++ b/resources/functions.R
@@ -0,0 +1,82 @@
+library(grid)
+library(gridExtra)
+library(gtable)
+format.percent <- function(x,digits=1){paste(round(x*100,digits),"\\%",sep='')}
+
+f <- function (x) {formatC(x, format="d", big.mark=',')}
+
+
+plot.simulation <- function(plot.df, iv='x', levels=c("true","naive", "amelia.full","mecor","gmm","mle", "zhang","feasible"),facet_lhs='m',facet_rhs='N'){
+
+    p <- .plot.simulation(plot.df, iv, levels)
+
+    p <- p +  facet_grid(as.formula(paste(facet_lhs,'~',facet_rhs)),as.table=F)     
+
+    p <- p + scale_x_discrete(labels=label_wrap_gen(14)) + ylab("Estimate") + xlab("Method") + coord_flip()
+
+    p <- arrangeGrob(p,
+                     top=grid.text("No. classifications",x=0.32,just='right'),
+                     right=grid.text("No. annotations",y=0.345,just='right',rot=270))
+
+    return(p)
+}
+
+.plot.simulation <- function(plot.df, iv='x', levels=c("true","naive", "amelia.full","mecor","gmm","mle", "zhang","feasible")){
+
+    plot.df <- copy(plot.df)
+
+    plot.df <- plot.df[,':='(method=factor(method,levels=levels,ordered=T),
+                             N=factor(N),
+                             m=factor(m))]
+
+    plot.df <- plot.df[,method.old:=method]
+
+    true.est <- mean(plot.df[(method=='True') & (variable==iv)]$mean.est)
+
+    plot.df <- plot.df[(variable==iv)&(method !="True")]
+
+    p <- ggplot(plot.df, aes(y=mean.est, ymax=est.upper.95, ymin=est.lower.95, x=method))
+    p <- p + geom_hline(aes(yintercept=true.est),linetype=2)
+
+    p <- p + geom_pointrange(shape=1,size=0.5)
+    p <- p + geom_linerange(aes(ymax=mean.ci.upper, ymin=mean.ci.lower),position=position_nudge(x=0.4), color='grey40')
+
+    return(p)
+}
+
+
+plot.simulation.iv <- function(plot.df, iv='x'){
+
+    plot.df <- plot.df[(N!=8000) & (m!=800) & (m!=200)]
+    new.levels <- c("true"="True","naive"="Naïve","amelia.full"="MI", "mecor"="mecor","gmm"="GMM", "mle"="MLE", "zhang"="PL","feasible"="Feasible")
+    plot.df[,method := new.levels[method]] 
+    
+    return(plot.simulation(plot.df, iv, levels=c("True","Naïve","MI", "GMM", "MLE", "PL", "Feasible")))
+}
+
+
+plot.simulation.dv <- function(plot.df, iv='x'){
+    plot.df <- copy(plot.df)
+    plot.df <- plot.df[(N!=8000) & (m!=800) & (m!=200)]
+
+    new.levels <- c("true"="True","naive"="Naïve","amelia.full"="MI", "mecor"="mecor","gmm"="GMM", "mle"="MLE", "zhang"="PL","feasible"="Feasible")
+
+    plot.df[,method:=new.levels[method]]
+    return(plot.simulation(plot.df, iv, levels=c("True","Naïve", "MI","MLE","PL","Feasible")))
+}
+
+plot.simulation.irr <- function(plot.df,iv='x'){
+    plot.df <- copy(plot.df)
+    new.levels <- c('true'="True","loa0.feasible"="1 coder", "loco.feasible"="2 coders", "loa0.mle"="1 coder MLE", "loco.mle"="2 coders MLE", "amelia.full"="2 coders MI", "zhang"="2 coders PL", "gmm"="2 coders GMM")
+    plot.df <- plot.df[,method:=new.levels[method]]
+    return(plot.simulation(plot.df, iv, levels=c("True","1 coder", "2 coders", "1 coder MLE","2 coders MLE","2 coders MI","2 coders PL","2 coders GMM")))
+
+}
+
+plot.simulation.irr.dv <- function(plot.df,iv='x'){
+    plot.df <- copy(plot.df)
+    new.levels <- c('true'="True","naive"="Naïve","loa0.feasible"="1 coder", "loco.feasible"="Feasible", "loa0.mle"="1 coder MLE", "loco.mle"="MLE", "amelia.full"="MI", "zhang"="PL", "gmm"="GMM")
+
+    plot.df <- plot.df[,method:=new.levels[method]]
+    return(plot.simulation(plot.df, iv, levels=c("True","Naïve", "Feasible", "MLE","MI","PL")))
+}
--- a/resources/real_data_example.R
+++ b/resources/real_data_example.R
@@ -0,0 +1,111 @@
+library(scales)
+library(data.table)
+library(ggplot2)
+iv.example <- readRDS("iv_perspective_example.RDS")
+
+dv.example <- readRDS("dv_perspective_example.RDS")
+
+iv.sample.prop <- iv.example$cc_ex_tox.likes.race_disclosed.medsampsample.prop
+dv.sample.prop <- dv.example$cc_ex_tox.likes.race_disclosed.largesampsample.prop
+
+iv.sample.count <- iv.example$cc_ex_tox.likes.race_disclosed.medsampsample.count
+dv.sample.count <- dv.example$cc_ex_tox.likes.race_disclosed.largesampsample.count
+
+
+plot.cc.example <- function(datalist, name, varnames=NULL, varorder=NULL, include.models=c("Automatic Classification", "All Annotations")){
+
+    model.names <- c("Automatic Classification", "All Annotations", "Annotation Sample", "Error Correction")
+
+    glm.par.names <- paste0(name,"coef_",c("pred", "coder", "sample"), "_model")
+
+    measerr.par.name <- paste0(name,"measerr_model_par")
+    glm.pars <- datalist[glm.par.names]
+
+    n.pars <- length(glm.pars[[1]])
+
+    all.pars <- append(glm.pars, list("corrected"=datalist[[measerr.par.name]][1:n.pars]))
+    names(all.pars) <- model.names
+    df.pars <- as.data.table(data.frame(all.pars),keep.rownames=TRUE)
+
+    if(!is.null(varnames)){
+        df.pars[, rn := varnames]
+    }
+
+    setnames(df.pars, old="rn", new="variable")
+
+    glm.stderr.names <- paste0(name,"se_",c("pred", "coder", "sample"), "_model")
+    glm.stderr <- datalist[glm.stderr.names]
+    measerr.stderr.name <- paste0(name,"measerr_model_stderr")
+    all.stderr <- append(glm.stderr, list("corrected"=datalist[[measerr.stderr.name]][1:n.pars]))
+    names(all.stderr) <- model.names
+    df.stderr <- as.data.table(data.frame(all.stderr), keep.rownames=TRUE)
+
+    if(!is.null(varnames)){
+        df.stderr[, rn := varnames]
+    }
+
+    setnames(df.stderr, old="rn", new="variable")
+
+    df.pars <- melt(df.pars, id.vars = "variable", variable.name = "Model", value.name = "Estimate")
+    
+    df.stderr <- melt(df.stderr, id.vars = "variable",variable.name = "Model", value.name = "StdErr")
+                                
+    df <- df.pars[df.stderr, on = c("variable", "Model")]
+  
+    df[,":="(UpperCI = Estimate + 1.96*sqrt(StdErr),
+             LowerCI = Estimate - 1.96*sqrt(StdErr))]
+
+    if(!is.null(varorder)){
+        df[,variable:=factor(variable,levels=varorder)]
+    }
+
+    df[,Model:= factor(gsub('\\.',' ', Model), levels=rev(model.names))]
+
+    df <- df[Model %in% include.models]
+
+    p <- ggplot(df[variable != "Intercept"], aes(y = Estimate, x=Model, ymax=LowerCI, ymin=UpperCI, group=variable))
+    p <- p + geom_pointrange(shape=1) + facet_wrap('variable',scales='free_x',nrow=1,as.table=F) + geom_hline(aes(yintercept=0),linetype='dashed',color='gray40') + coord_flip() + xlab("")
+    p <- p + scale_y_continuous(breaks=breaks_extended(4))
+    return(p)
+}
+
+
+plot.civilcomments.dv.example <- function(include.models=c("Automatic Classification", "All Annotations")){
+    return(plot.cc.example(dv.example, "cc_ex_tox.likes.race_disclosed.medsamp", varnames=c("Intercept", "Likes", "Identity Disclosure", "Likes:Identity Disclosure"),varorder=c("Intercept", "Likes", "Identity Disclosure", "Likes:Identity Disclosure"), include.models=include.models) + ylab("Coefficients and 95% Confidence Intervals") + ggtitle("Logistic Regression Predicting Toxicity"))
+}
+
+
+plot.civilcomments.iv.example <- function(include.models=c("Automatic Classification", "All Annotations")){
+    plot.cc.example(iv.example, "cc_ex_tox.likes.race_disclosed.medsamp", varnames=c("Intercept", "Likes", "Likes:Toxicity", "Toxicity"),varorder=c("Intercept", "Likes", "Toxicity", "Likes:Toxicity"), include.models=include.models) + ylab("Coefficients and 95% Confidence Intervals") + ggtitle("Logistic Regression Predicting Racial/Ethnic Identity Disclosure")
+}
+
+
+plot.civilcomments.iv.example.2 <- function(){
+    attach(iv.example)
+    df.pars <- rbind(cc_ex_tox.likes.race_disclosedcoef_pred_model,
+                     cc_ex_tox.likes.race_disclosedcoef_coder_model,
+                     cc_ex_tox.likes.race_disclosedcoef_sample_model,
+                     cc_ex_tox.likes.race_disclosedmeaserr_model_par[1:3]
+                     )
+
+    rownames(df.pars) <- c('predictions', 'coders', 'sample', 'corrected')
+
+    df.stderr <- rbind(cc_ex_tox.likes.race_disclosedse_pred_model,
+                     cc_ex_tox.likes.race_disclosedse_coder_model,
+                     cc_ex_tox.likes.race_disclosedse_sample_model,
+                     cc_ex_tox.likes.race_disclosedmeaserr_model_stderr[1:3]
+                     )
+
+    rownames(df.pars) <- c('predictions', 'coders', 'sample', 'corrected')
+
+    ci.upper <- df.pars + 1.96 * sqrt(df.stderr)
+    ci.lower <- df.pars - 1.96 * sqrt(df.stderr)
+    return(plot.cc.example(df.pars, ci.lower, ci.upper))
+}
+
+
+
+
+
+
+
--- a/resources/robustness_check_plots.R
+++ b/resources/robustness_check_plots.R
@@ -0,0 +1,268 @@
+library(data.table)
+library(ggplot2)
+source('resources/functions.R')
+
+plot.robustness.1 <- function(iv='x'){
+
+## robustness check 1 test g
+    r <- readRDS('robustness_1.RDS')
+    baseline_df <- readRDS('remembr.RDS')[['plot.df.example.2']]
+    robust_df <- data.table(r$robustness_1)
+
+    ## just compare the mle methods in the two examples
+    robust_df <- robust_df[Bzy!=0]
+    robust_df <- robust_df[Bzx!=0]
+    baseline_df[method=='true', method:='True']
+    robust_df[method=='true', method:='True']
+
+    baseline_df <- baseline_df[(method=='mle') | (method=='True') | (method=='naive')]
+    robust_df <- robust_df[(method=='mle') | (method=='True')]
+
+    baseline_df[method=='mle',method:='MLE Reported']
+    robust_df[method=='mle',method:='No Z in Error Model']
+
+    df <- rbind(baseline_df, robust_df, fill=TRUE)
+    df[method=='naive', method:='Naive']
+    df <- df[(N %in% c(1000,5000)) & (m %in% c(200,100))]
+    p <- plot.simulation(df,iv=iv,levels=c('MLE Reported','No Z in Error Model', 'Naive', 'True'))
+    grid.draw(p)
+}
+
+plot.robustness.1.checkassumption <- function(iv='x'){
+
+## robustness check 1 test g
+    r <- readRDS('robustness_1.RDS')
+    baseline_df <- readRDS('remembr.RDS')[['plot.df.example.2']]
+    robust_df <- data.table(r$robustness_1)
+
+    ## just compare the mle methods in the two examples
+    robust_df <- robust_df[Bzy==0]
+    robust_df <- robust_df[Bzx!=0]
+    baseline_df[method=='true', method:='True']
+    robust_df[method=='true', method:='True']
+
+    baseline_df <- baseline_df[(method=='mle') | (method=='naive')]
+    robust_df <- robust_df[(method=='mle') | (method=='True')]
+
+    baseline_df[method=='mle',method:='MLE Reported']
+    robust_df[method=='mle',method:='No Z in Error Model']
+
+    df <- rbind(baseline_df, robust_df, fill=TRUE)
+    df[method=='naive', method:='Naive']
+    df <- df[(N %in% c(1000,5000)) & (m %in% c(200,100))]
+    p <- plot.simulation(df,iv=iv,levels=c('MLE Reported','No Z in Error Model', 'Naive', 'True'))
+    grid.draw(p)
+}
+
+plot.robustness.1.dv <- function(iv='z'){
+
+    ## robustness check 1 test g
+    r <- readRDS('robustness_1_dv.RDS')
+    baseline_df <- readRDS('remembr.RDS')[['plot.df.example.4']]
+    robust_df <- data.table(r$robustness_1_dv)
+
+    ## just compare the mle methods in the two examples
+    
+    baseline_df[method=='true', method:='True']
+    robust_df[method=='true', method:='True']
+
+    robust_df <- robust_df[Bxy!=0]
+    robust_df <- robust_df[Bzy!=0]
+    # robust_df <- robust_df[Bzx==-0.1]
+
+    baseline_df <- baseline_df[(method=='mle') | (method=='True') | (method=='naive')]
+    robust_df <- robust_df[(method=='mle') | (method=='True')]
+
+    baseline_df[method=='mle',method:='MLE Reported']
+    robust_df[method=='mle',method:='No Z in Error Model']
+
+    df <- rbind(baseline_df, robust_df, fill=TRUE)
+    df <- df[(N %in% c(1000,5000)) & (m %in% c(200,100))]
+    df[method=='naive', method:='Naive']
+
+    p <- plot.simulation(df,iv=iv,levels=c('MLE Reported','No Z in Error Model','Naive', 'True'))
+    grid.draw(p)
+}
+
+plot.robustness.2.iv <- function(iv, n.annotations=100, n.classifications=5000){
+
+    r <- readRDS("robustness_2.RDS")
+    robust_df <- data.table(r[['robustness_2']])
+
+    robust_df <- robust_df[(m==n.annotations) & (N==n.classifications)]
+
+    new.levels <- c("true"="True","naive"="Naïve","amelia.full"="MI", "mecor"="mecor","gmm"="GMM", "mle"="MLE", "zhang"="PL","feasible"="Feasible")
+
+    robust_df <- robust_df[,method := new.levels[method]]
+    robust_df <- robust_df[method != "Feasible"]
+    p <- .plot.simulation(robust_df, iv=iv, levels=c("True","Naïve","MI", "GMM", "MLE", "PL", "Feasible"))
+    
+    p <- p + facet_wrap(prediction_accuracy~., ncol=4,as.table=F)
+    p <- p + scale_x_discrete(labels=label_wrap_gen(14)) + ylab("Estimate") + xlab("Method") + coord_flip()
+    
+
+    p <- arrangeGrob(p,
+                     top=grid.text("AC Accuracy",x=0.32,just='right'))
+                     
+    grid.draw(p)
+}
+
+robust2 <- readRDS("robustness_2_dv.RDS")
+robust_2_df <- data.table(robust2[['robustness_2_dv']])
+robust_2_min_acc <- min(robust_2_df[,prediction_accuracy])
+robust_2_max_acc <- max(robust_2_df[,prediction_accuracy])
+
+plot.robustness.2.dv <- function(iv, n.annotations=100, n.classifications=5000){
+
+    r <- readRDS("robustness_2_dv.RDS")
+    robust_df <- data.table(r[['robustness_2_dv']])
+
+
+    #temporary work around a bug in the makefile
+    ## if('Px' %in% names(robust_df))
+    ##     robust_df <- robust_df[is.na(Px)]
+    robust_df <- robust_df[(m==n.annotations) & (N==n.classifications)]
+
+    new.levels <- c("true"="True","naive"="Naïve","amelia.full"="MI", "mecor"="mecor","gmm"="GMM", "mle"="MLE", "zhang"="PL","feasible"="Feasible")
+
+    robust_df <- robust_df[,method := new.levels[method]]
+    robust_df <- robust_df[method != "Feasible"]
+    p <- .plot.simulation(robust_df, iv=iv, levels=c("True","Naïve","MI", "GMM", "MLE", "PL", "Feasible"))
+    
+    p <- p + facet_wrap(prediction_accuracy~., ncol=4,as.table=F)
+    p <- p + scale_x_discrete(labels=label_wrap_gen(14)) + ylab("Estimate") + xlab("Method") + coord_flip()
+
+    p <- arrangeGrob(p,
+                     top=grid.text("AC Accuracy",x=0.32,just='right'))
+                     
+    grid.draw(p)
+}
+
+
+plot.robustness.3.iv <- function(iv, n.annotations=100, n.classifications=5000){
+    r <- readRDS('robustness_3.RDS')
+    robust_df <- data.table(r[['robustness_3']])
+    r2 <- readRDS('robustness_3_proflik.RDS')
+    robust_df_proflik <- data.table(r2[['robustness_3_proflik']])
+
+    new.levels <- c("true"="True","naive"="Naïve","amelia.full"="MI", "mecor"="mecor","gmm"="GMM", "mle"="MLE", "zhang"="PL","feasible"="Feasible")
+
+    robust_df <- robust_df[(m==n.annotations) & (N==n.classifications)]
+    robust_df <- robust_df[,method := new.levels[method]]
+    robust_df <- robust_df[method != "Feasible"]
+    robust_df <- robust_df[method=='MLE',method:='Fischer likelihood']
+
+    robust_df_proflik <- robust_df_proflik[(m==n.annotations) & (N==n.classifications)]
+    robust_df_proflik <- robust_df_proflik[method=='MLE',method:='Profile likelihood']
+
+
+
+    robust_df_proflik <- robust_df_proflik[,method := new.levels[method]]
+    robust_df_proflik <- robust_df_proflik[method != "Feasible"]
+
+    df <- rbind(robust_df, robust_df_proflik)
+
+    p <- .plot.simulation(df, iv=iv, levels=c("True","Naïve","MI", "GMM", "MLE", "PL", "Feasible"))
+    
+    p <- p + facet_wrap(Px~., ncol=3,as.table=F)
+    p <- p + scale_x_discrete(labels=label_wrap_gen(14)) + ylab("Estimate") + xlab("Method") + coord_flip()
+
+    p <- arrangeGrob(p,
+                     top=grid.text("P(X)",x=0.32,just='right'))
+                     
+    grid.draw(p)
+}
+
+plot.robustness.3.dv <- function(iv, n.annotations=100, n.classifications=1000){
+    r <- readRDS('robustness_3_dv.RDS')
+    robust_df <- data.table(r[['robustness_3_dv']])
+
+    new.levels <- c("true"="True","naive"="Naïve","amelia.full"="MI", "mecor"="mecor","mle"="MLE", "zhang"="PL","feasible"="Feasible")
+
+    robust_df <- robust_df[(m==n.annotations) & (N==n.classifications)]
+
+    robust_df <- robust_df[,method := new.levels[method]]
+    robust_df <- robust_df[method != "Feasible"]
+
+    p <- .plot.simulation(robust_df, iv=iv, levels=c("True","Naïve","MI", "GMM", "MLE", "PL", "Feasible"))
+    
+    p <- p + facet_wrap(B0~., ncol=3,as.table=F)
+    p <- p + scale_x_discrete(labels=label_wrap_gen(14)) + ylab("Estimate") + xlab("Method") + coord_flip()
+
+    p <- arrangeGrob(p,
+                     top=grid.text("P(Y)",x=0.32,just='right'))
+                     
+    grid.draw(p)
+}
+
+plot.robustness.4.iv <- function(iv, n.annotations=100, n.classifications=1000){
+    r <- readRDS('robustness_4.RDS')
+    robust_df <- data.table(r[['robustness_4']])
+
+    new.levels <- c("true"="True","naive"="Naïve","amelia.full"="MI", "mecor"="mecor","gmm"="GMM", "mle"="MLE", "zhang"="PL","feasible"="Feasible")
+
+    robust_df <- robust_df[(m==n.annotations) & (N==n.classifications)]
+
+    robust_df <- robust_df[,method := new.levels[method]]
+    robust_df <- robust_df[method != "Feasible"]
+
+    robust_df <- robust_df[,y_bias=factor(robust_df$y_bias,levels=sort(unique(robust_df$y_bias),decreasing=TRUE))]
+    p <- .plot.simulation(robust_df, iv=iv, levels=c("True","Naïve","MI", "GMM", "MLE", "PL", "Feasible"))
+    
+    p <- p + facet_wrap(y_bias~., ncol=3,as.table=T)
+    p <- p + scale_x_discrete(labels=label_wrap_gen(14)) + ylab("Estimate") + xlab("Method") + coord_flip()
+
+    p <- arrangeGrob(p,
+                     top=grid.text("Coefficient of Y for W",x=0.32,just='right'))
+                     
+    grid.draw(p)
+}
+
+
+    
+plot.robustness.4.iv <- function(iv, n.annotations=100, n.classifications=1000){
+    r <- readRDS('robustness_4.RDS')
+    robust_df <- data.table(r[['robustness_4']])
+
+
+    new.levels <- c("true"="True","naive"="Naïve","amelia.full"="MI", "mecor"="mecor","gmm"="GMM", "mle"="MLE", "zhang"="PL","feasible"="Feasible")
+
+    robust_df <- robust_df[(m==n.annotations) & (N==n.classifications)]
+
+    robust_df <- robust_df[,method := new.levels[method]]
+    robust_df <- robust_df[method != "Feasible"]
+
+    robust_df <- robust_df[,y_bias=factor(robust_df$y_bias,levels=sort(unique(robust_df$y_bias),decreasing=TRUE))]
+    p <- .plot.simulation(robust_df, iv=iv, levels=c("True","Naïve","MI", "GMM", "MLE", "PL", "Feasible"))
+    
+    p <- p + facet_wrap(y_bias~., ncol=3,as.table=T)
+    p <- p + scale_x_discrete(labels=label_wrap_gen(14)) + ylab("Estimate") + xlab("Method") + coord_flip()
+
+    p <- arrangeGrob(p,
+                     top=grid.text("Coefficient of Y for W",x=0.32,just='right'))
+                     
+    grid.draw(p)
+}
+
+plot.robustness.4.dv <- function(iv, n.annotations=100, n.classifications=1000){
+    r <- readRDS('robustness_4_dv.RDS')
+    robust_df <- data.table(r[['robustness_4']])
+
+    new.levels <- c("true"="True","naive"="Naïve","amelia.full"="MI", "mecor"="mecor","mle"="MLE", "zhang"="PL","feasible"="Feasible")
+
+    robust_df <- robust_df[(m==n.annotations) & (N==n.classifications)]
+
+    robust_df <- robust_df[,method := new.levels[method]]
+    robust_df <- robust_df[method != "Feasible"]
+
+    robust_df <- robust_df[,z_bias=factor(z_bias, levels=sort(unique(z_bias),descending=TRUE))]
+
+    p <- .plot.simulation(robust_df, iv=iv, levels=c("True","Naïve","MI", "GMM", "MLE", "PL", "Feasible"))
+    p <- p + facet_wrap(z_bias~., ncol=3,as.table=F)
+    p <- p + scale_x_discrete(labels=label_wrap_gen(14)) + ylab("Estimate") + xlab("Method") + coord_flip()
+
+    p <- arrangeGrob(p,
+                     top=grid.text("Coefficient of Z on W",x=0.32,just='right'))
+                     
+    grid.draw(p)
+}
--- a/resources/variables.R
+++ b/resources/variables.R
@@ -0,0 +1,55 @@
+library(knitr)
+library(ggplot2)
+library(data.table)
+knitr::opts_chunk$set(fig.show='hold')
+f <- function (x) {formatC(x, format="d", big.mark=',')}
+format.percent <- function(x) {x<-as.numeric(x);paste(f(x*100),"\\%",sep='')}
+
+theme_set(theme_bw())
+r <- readRDS('remembr.RDS')
+attach(r)
+r2 <- readRDS('remember_irr.RDS')
+attach(r2)
+r3 <- readRDS('remember_grid_sweep.RDS')
+attach(r3)
+
+
+## simulation.summary.df <- data.table(sample.4
+## simulation.summary.df <- kable(simulation.summary.df,format='latex',row.names=T, column.names=c("Factors", "Input Parameters")
+
+
+sim1a.cor.xz <- as.numeric(unlist(example.1['med.cor.xz']))
+sim1a.acc <- unlist(example.1['med.accuracy'])
+sim1b.acc <- unlist(example.2['med.accuracy'])
+sim1b.acc.y1 <- unlist(example.2['med.accuracy.y1'])
+sim1b.acc.y0 <- example.2['med.accuracy.y0']
+(sim1b.fnr <- example.2['med.fnr'])
+(sim1b.fnr.y0 <- example.2['med.fnr.y0'])
+(sim1b.fnr.y1 <- example.2['med.fnr.y1'])
+sim1b.fpr <- example.2['med.fpr']
+sim1b.fpr.y0 <- example.2['med.fpr.y0']
+sim1b.fpr.y1 <- example.2['med.fpr.y1']
+sim1b.cor.resid.w_pred <- as.numeric(unlist(example.2['cor.resid.w_pred']))
+(sim1b.cor.xz <- example.2['med.cor.xz'])
+
+sim2a.AC.acc <- example.3['med.accuracy']
+sim2a.lik.ratio <- example.3['med.lik.ratio']
+sim2a.cor.xz <- as.numeric(example.3['med.cor.xz'])
+
+
+sim2b.AC.acc <- example.4['med.accuracy']
+sim2b.lik.ratio <- example.4['med.lik.ratio']
+(sim2b.error.cor.x <- as.numeric(unlist(example.4['med.error.cor.x'])))
+(sim2b.error.cor.z <- as.numeric(unlist(example.4['med.error.cor.z'])))
+
+n.simulations <- max(unlist(example_1_jobs$seed))
+sim1a.cor.xz <- as.numeric(unlist(example.3['med.cor.xz']))
+sim1.R2 <- unlist(example_1_jobs$y_explained_variance)
+N.sizes <- unlist(example_1_jobs$N)
+N.sizes <- N.sizes[N.sizes!=800]
+m.sizes <- unlist(example_1_jobs$m)
+sim2.Bx <- as.numeric(example_4_jobs$Bxy)
+sim2.Bz <- as.numeric(example_4_jobs$Bzy)
+sim1.z.sd <- 0.5
+irr.coder.accuracy <- unlist(example_5_jobs$coder_accuracy)
+med.loco.accuracy <-  unlist(example.5$med.loco.acc)