ml_measurement_error_overleaf/resources/functions.R

library(grid)
library(gridExtra)
library(gtable)
format.percent <- function(x,digits=1){paste(round(x*100,digits),"\\%",sep='')}

f <- function (x) {formatC(x, format="d", big.mark=',')}


plot.simulation <- function(plot.df, iv='x', levels=c("true","naive", "amelia.full","mecor","gmm","mle", "zhang","feasible"),facet_lhs='m',facet_rhs='N'){

    p <- .plot.simulation(plot.df, iv, levels)

    p <- p +  facet_grid(as.formula(paste(facet_lhs,'~',facet_rhs)),as.table=F)

    p <- p + scale_x_discrete(labels=label_wrap_gen(14)) + ylab(paste0("Estimate of ", toupper(iv))) + xlab("Method") + coord_flip()

    p <- arrangeGrob(p,
                     top=grid.text("No. classifications",x=0.32,just='right'),
                     right=grid.text("No. annotations",y=0.345,just='right',rot=270))

    return(p)
}

.plot.simulation <- function(plot.df, iv='x', levels=c("true","naive", "amelia.full","mecor","gmm","mle", "zhang","feasible")){

    plot.df <- copy(plot.df)

    plot.df <- plot.df[,':='(method=factor(method,levels=levels,ordered=T),
                             N=factor(N),
                             m=factor(m))]

    plot.df <- plot.df[,method.old:=method]

    true.est <- mean(plot.df[(method=='True') & (variable==iv)]$mean.est)

    plot.df <- plot.df[(variable==iv)&(method !="True")]

    p <- ggplot(plot.df, aes(y=mean.est, ymax=est.upper.95, ymin=est.lower.95, x=method))
    p <- p + geom_hline(aes(yintercept=true.est),linetype=2)

    p <- p + geom_pointrange(shape=1,size=0.5)
    p <- p + geom_linerange(aes(ymax=median.ci.upper, ymin=median.ci.lower),position=position_nudge(x=0.4), color='grey40')

    return(p)
}


plot.simulation.iv <- function(plot.df, iv='x'){

    plot.df <- plot.df[(N!=8000) & (m!=800) & (m!=200)]
    new.levels <- c("true"="True","naive"="Naïve","amelia.full"="MI", "mecor"="mecor","gmm"="GMM", "mle"="MLA", "zhang"="PL","feasible"="Feasible")
    plot.df[,method := new.levels[method]]

    return(plot.simulation(plot.df, iv, levels=c("True","Naïve","MI", "GMM", "MLA", "PL", "Feasible")))
}


plot.simulation.dv <- function(plot.df, iv='x'){
    plot.df <- copy(plot.df)
    plot.df <- plot.df[(N!=8000) & (m!=800) & (m!=200)]

    new.levels <- c("true"="True","naive"="Naïve","amelia.full"="MI", "mecor"="mecor","gmm"="GMM", "mle"="MLA", "zhang"="PL","feasible"="Feasible")

    plot.df[,method:=new.levels[method]]
    return(plot.simulation(plot.df, iv, levels=c("True","Naïve", "MI","MLA","PL","Feasible")))
}

plot.simulation.irr <- function(plot.df,iv='x'){
    plot.df <- copy(plot.df)
    new.levels <- c('true'="True","loa0.feasible"="1 coder", "loco.feasible"="2 coders", "loa0.mle"="1 coder MLE", "loco.mle"="2 coders MLE", "amelia.full"="2 coders MI", "zhang"="2 coders PL", "gmm"="2 coders GMM")
    plot.df <- plot.df[,method:=new.levels[method]]
    return(plot.simulation(plot.df, iv, levels=c("True","1 coder", "2 coders", "1 coder MLE","2 coders MLE","2 coders MI","2 coders PL","2 coders GMM")))

}

plot.simulation.irr.dv <- function(plot.df,iv='x'){
    plot.df <- copy(plot.df)
    new.levels <- c('true'="True","naive"="Naïve","loa0.feasible"="1 coder", "loco.feasible"="Feasible", "loa0.mle"="1 coder MLE", "loco.mle"="MLE", "amelia.full"="MI", "zhang"="PL", "gmm"="GMM")

    plot.df <- plot.df[,method:=new.levels[method]]
    return(plot.simulation(plot.df, iv, levels=c("True","Naïve", "Feasible", "MLE","MI","PL")))
}