1
0
Files
ml_measurement_error_overleaf/resources/robustness_check_plots.R
2023-03-03 18:43:14 +00:00

251 lines
10 KiB
R

library(data.table)
library(ggplot2)
source('resources/functions.R')
plot.robustness.1 <- function(iv='x'){
## robustness check 1 test g
r <- readRDS('robustness_1.RDS')
baseline_df <- readRDS('remembr.RDS')[['plot.df.example.2']]
robust_df <- data.table(r$robustness_1)
## just compare the mle methods in the two examples
robust_df <- robust_df[Bzy!=0]
robust_df <- robust_df[Bzx!=0]
baseline_df[method=='true', method:='True']
robust_df[method=='true', method:='True']
baseline_df <- baseline_df[(method=='mle') | (method=='True') | (method=='naive')]
robust_df <- robust_df[(method=='mle') | (method=='True')]
baseline_df[method=='mle',method:='MLA Reported']
robust_df[method=='mle',method:='No Z in Error Model']
df <- rbind(baseline_df, robust_df, fill=TRUE)
df[method=='naive', method:='Naive']
df[method=='MLA Reported', method:='Correct MLA']
df[method=='No Z in Error Model', method:='Misspec. MLA']
df <- df[(N %in% c(1000,5000)) & (m %in% c(200,100))]
p <- plot.simulation(df,iv=iv,levels=c('Correct MLA','Misspec. MLA', 'Naive', 'True'))
grid.draw(p)
}
plot.robustness.1.checkassumption <- function(iv='x'){
## robustness check 1 test g
r <- readRDS('robustness_1.RDS')
baseline_df <- readRDS('remembr.RDS')[['plot.df.example.2']]
robust_df <- data.table(r$robustness_1)
## just compare the mle methods in the two examples
robust_df <- robust_df[Bzy==0]
robust_df <- robust_df[Bzx!=0]
baseline_df[method=='true', method:='True']
robust_df[method=='true', method:='True']
baseline_df <- baseline_df[(method=='mle') | (method=='naive')]
robust_df <- robust_df[(method=='mle') | (method=='True')]
baseline_df[method=='mle',method:='MLA Reported']
robust_df[method=='mle',method:='No Z in Error Model']
df <- rbind(baseline_df, robust_df, fill=TRUE)
df[method=='naive', method:='Naive']
df <- df[(N %in% c(1000,5000)) & (m %in% c(200,100))]
p <- plot.simulation(df,iv=iv,levels=c('MLA Reported','No Z in Error Model', 'Naive', 'True'))
grid.draw(p)
}
plot.robustness.1.dv <- function(iv='z'){
## robustness check 1 test g
r <- readRDS('robustness_1_dv.RDS')
baseline_df <- readRDS('remembr.RDS')[['plot.df.example.4']]
robust_df <- data.table(r$robustness_1_dv)
## just compare the mle methods in the two examples
baseline_df[method=='true', method:='True']
robust_df[method=='true', method:='True']
robust_df <- robust_df[Bxy!=0]
robust_df <- robust_df[Bzy!=0]
# robust_df <- robust_df[Bzx==-0.1]
baseline_df <- baseline_df[(method=='mle') | (method=='True') | (method=='naive')]
robust_df <- robust_df[(method=='mle') | (method=='True')]
baseline_df[method=='mle',method:='MLA Reported']
robust_df[method=='mle',method:='No Z in Error Model']
df <- rbind(baseline_df, robust_df, fill=TRUE)
df <- df[(N %in% c(1000,5000)) & (m %in% c(200,100))]
df[method=='naive', method:='Naive']
df[method=='MLA Reported', method:='Correct MLA']
df[method=='No Z in Error Model', method:='Misspec. MLA']
p <- plot.simulation(df,iv=iv,levels=c('Correct MLA','Misspec. MLA','Naive', 'True'))
grid.draw(p)
}
plot.robustness.2.iv <- function(iv, n.annotations=100, n.classifications=5000){
r <- readRDS("robustness_2.RDS")
robust_df <- data.table(r[['robustness_2']])
robust_df <- robust_df[(m==n.annotations) & (N==n.classifications)]
new.levels <- c("true"="True","naive"="Naïve","amelia.full"="MI", "mecor"="mecor","gmm"="GMM", "mle"="MLA", "zhang"="PL","feasible"="Feasible")
robust_df <- robust_df[,method := new.levels[method]]
robust_df <- robust_df[method != "Feasible"]
p <- .plot.simulation(robust_df, iv=iv, levels=c("True","Naïve","MI", "GMM", "MLA", "PL", "Feasible"))
p <- p + facet_wrap(prediction_accuracy~., ncol=4,as.table=F)
p <- p + scale_x_discrete(labels=label_wrap_gen(14)) + ylab("Estimate") + xlab("Method") + coord_flip()
p <- arrangeGrob(p,
top=grid.text("Varying Accuracy of the AC",x=0.42,just='right'))
grid.draw(p)
}
robust2 <- readRDS("robustness_2_dv.RDS")
robust_2_df <- data.table(robust2[['robustness_2_dv']])
robust_2_min_acc <- min(robust_2_df[,prediction_accuracy])
robust_2_max_acc <- max(robust_2_df[,prediction_accuracy])
plot.robustness.2.dv <- function(iv, n.annotations=100, n.classifications=5000){
r <- readRDS("robustness_2_dv.RDS")
robust_df <- data.table(r[['robustness_2_dv']])
#temporary work around a bug in the makefile
## if('Px' %in% names(robust_df))
## robust_df <- robust_df[is.na(Px)]
robust_df <- robust_df[(m==n.annotations) & (N==n.classifications)]
new.levels <- c("true"="True","naive"="Naïve","amelia.full"="MI", "mecor"="mecor","gmm"="GMM", "mle"="MLA", "zhang"="PL","feasible"="Feasible")
robust_df <- robust_df[,method := new.levels[method]]
robust_df <- robust_df[method != "Feasible"]
p <- .plot.simulation(robust_df, iv=iv, levels=c("True","Naïve","MI", "GMM", "MLA", "PL", "Feasible"))
p <- p + facet_wrap(prediction_accuracy~., ncol=4,as.table=F)
p <- p + scale_x_discrete(labels=label_wrap_gen(14)) + ylab("Estimate") + xlab("Method") + coord_flip()
p <- arrangeGrob(p,
top=grid.text("Varying Accuracy of the AC",x=0.42,just='right'))
grid.draw(p)
}
plot.robustness.3.iv <- function(iv, n.annotations=200, n.classifications=5000){
r <- readRDS('robustness_3.RDS')
robust_df <- data.table(r[['robustness_3']])
r2 <- readRDS('robustness_3_proflik.RDS')
robust_df_proflik <- data.table(r2[['robustness_3_proflik']])
new.levels <- c("true"="True","naive"="Naïve","amelia.full"="MI", "mecor"="mecor","gmm"="GMM", "mle"="MLA", "zhang"="PL","feasible"="Feasible")
robust_df <- robust_df[(m==n.annotations) & (N==n.classifications)]
robust_df <- robust_df[,method := new.levels[method]]
robust_df <- robust_df[(method != "Feasible") & (Bzx==0.3)]
robust_df <- robust_df[(method != "PL")]
## robust_df <- robust_df[method=='MLA',method:='Fischer likelihood']
## robust_df_proflik <- robust_df_proflik[(m==n.annotations) & (N==n.classifications)]
## robust_df_proflik <- robust_df_proflik[,method := new.levels[method]]
## robust_df_proflik <- robust_df_proflik[method=='MLA']
## robust_df_proflik <- robust_df_proflik[method=='MLA',method:='Profile likelihood']
## robust_df_proflik <- robust_df_proflik[method != "Feasible"]
## df <- rbind(robust_df, robust_df_proflik)
p <- .plot.simulation(robust_df, iv=iv, levels=c("True","Naïve","MI", "GMM","MLA", "PL", "Feasible"))
p <- p + facet_wrap(Px~., ncol=3,as.table=F)
p <- p + scale_x_discrete(labels=label_wrap_gen(14)) + ylab("Estimate") + xlab("Method") + coord_flip()
p <- arrangeGrob(p,
top=grid.text("Imbalance in X",x=0.32,just='right'))
grid.draw(p)
}
plot.robustness.3.dv <- function(iv, n.annotations=100, n.classifications=1000){
r <- readRDS('robustness_3_dv.RDS')
robust_df <- data.table(r[['robustness_3_dv']])
new.levels <- c("true"="True","naive"="Naïve","amelia.full"="MI", "mecor"="mecor","mle"="MLA", "zhang"="PL","feasible"="Feasible")
robust_df <- robust_df[(m==n.annotations) & (N==n.classifications)]
robust_df <- robust_df[,method := new.levels[method]]
robust_df <- robust_df[method != "Feasible"]
robust_df <- robust_df[,Py := round(plogis(B0),2)]
p <- .plot.simulation(robust_df, iv=iv, levels=c("True","Naïve","MI", "GMM", "MLA", "PL", "Feasible"))
robust_df <- robust_df[(method != "PL")]
p <- p + facet_wrap(Py~., ncol=3,as.table=F,scales='free')
p <- p + scale_x_discrete(labels=label_wrap_gen(14)) + ylab("Estimate") + xlab("Method") + coord_flip()
p <- arrangeGrob(p,
top=grid.text("Imbalance in Y",x=0.32,just='right'))
grid.draw(p)
}
plot.robustness.4.iv <- function(iv, n.annotations=100, n.classifications=1000){
r <- readRDS('robustness_4.RDS')
robust_df <- data.table(r[['robustness_4']])
new.levels <- c("true"="True","naive"="Naïve","amelia.full"="MI", "mecor"="mecor","gmm"="GMM", "mle"="MLA", "zhang"="PL","feasible"="Feasible")
robust_df <- robust_df[(m==n.annotations) & (N==n.classifications)]
robust_df <- robust_df[,method := new.levels[method]]
robust_df <- robust_df[method != "Feasible"]
robust_df <- robust_df[,y_bias:=factor(robust_df$y_bias,levels=sort(unique(robust_df$y_bias),decreasing=TRUE))]
robust_df <- robust_df[Bzx==1]
p <- .plot.simulation(robust_df, iv=iv, levels=c("True","Naïve","MI", "GMM", "MLA", "PL", "Feasible"))
p <- p + facet_wrap(y_bias~., ncol=3,as.table=T)
p <- p + scale_x_discrete(labels=label_wrap_gen(14)) + ylab("Estimate") + xlab("Method") + coord_flip()
p <- arrangeGrob(p,
top=grid.text("Varying Degree of Misclassification in X",x=0.52,just='right'))
grid.draw(p)
}
plot.robustness.4.dv <- function(iv, n.annotations=100, n.classifications=1000){
r <- readRDS('robustness_4_dv.RDS')
robust_df <- data.table(r[['robustness_4']])
new.levels <- c("true"="True","naive"="Naïve","amelia.full"="MI", "mecor"="mecor","mle"="MLA", "zhang"="PL","feasible"="Feasible")
robust_df <- robust_df[(m==n.annotations) & (N==n.classifications)]
robust_df <- robust_df[,method := new.levels[method]]
robust_df <- robust_df[method != "Feasible"]
robust_df <- robust_df[,z_bias:=factor(z_bias, levels=sort(unique(z_bias),decreasing=TRUE))]
robust_df <- robust_df[Bzx==1]
p <- .plot.simulation(robust_df, iv=iv, levels=c("True","Naïve","MI", "GMM", "MLA", "PL", "Feasible"))
p <- p + facet_wrap(z_bias~., ncol=3,as.table=F)
p <- p + scale_x_discrete(labels=label_wrap_gen(14)) + ylab("Estimate") + xlab("Method") + coord_flip()
p <- arrangeGrob(p,
top=grid.text("Varying Degree of Misclassification in Y",x=0.52,just='right'))
grid.draw(p)
}