Add simulation of listwise deletion and averaging of labeled-only estimators
This commit is contained in:
parent
6b19a39464
commit
27a3849d5d
81
irr/loco_loa.R
Normal file
81
irr/loco_loa.R
Normal file
@ -0,0 +1,81 @@
|
||||
.emulate_coding <- function(ground_truth, Q = 1) {
|
||||
if (runif(1) > Q) {
|
||||
return(sample(c(0, 1), size = 1, replace = TRUE))
|
||||
} else {
|
||||
return(ground_truth)
|
||||
}
|
||||
}
|
||||
|
||||
distort_gt <- function(x, Q = NULL) {
|
||||
return(purrr::map_dbl(x, .emulate_coding, Q = Q))
|
||||
}
|
||||
|
||||
N <- c(1000, 3600, 14400)
|
||||
m <- c(75, 150, 300)
|
||||
|
||||
B0 <- c(0, 0.1, 0.3)
|
||||
Bxy <- c(0.1, 0.2, 0.5)
|
||||
|
||||
Q <- c(.6, .8, .9)
|
||||
|
||||
conditions <- expand.grid(N, m, B0, Bxy, Q)
|
||||
|
||||
colnames(conditions) <- c("N", "m", "B0", "Bxy", "Q")
|
||||
|
||||
logistic <- function(x) {1/(1+exp(-1*x))}
|
||||
|
||||
.step <- function(i, Bxy, B0, Q, N, m) {
|
||||
x <- rbinom(N, 1, 0.5)
|
||||
y <- Bxy * x + rnorm(N, 0, .5) + B0
|
||||
|
||||
dx <- as.numeric(distort_gt(x, Q = Q))
|
||||
|
||||
randomidx <- sample(seq(N), m)
|
||||
|
||||
coder1x <- distort_gt(x[randomidx], Q = Q)
|
||||
coder2x <- distort_gt(x[randomidx], Q = Q)
|
||||
coding_data <- matrix(c(as.numeric(coder1x), as.numeric(coder2x)), nrow = 2, byrow = TRUE)
|
||||
alpha <- irr::kripp.alpha(coding_data, method = "nominal")
|
||||
estimated_q <- alpha$value^(1/2)
|
||||
estimated_q2 <- alpha$value
|
||||
|
||||
res <- data.frame(x = as.factor(x), y = y, dx = as.factor(dx))
|
||||
small_y <- y[randomidx]
|
||||
small_x <- x[randomidx]
|
||||
naive_mod <- glm(y~dx, data = res, x = TRUE, y = TRUE)
|
||||
real_mod <- glm(y~x, data = res, x = TRUE, y = TRUE)
|
||||
m1 <- glm(small_y~coder1x)
|
||||
m2 <- glm(small_y~coder2x)
|
||||
m3 <- glm(small_y~small_x)
|
||||
correct_only_idx <- coder1x == coder2x
|
||||
m4 <- glm(small_y[correct_only_idx] ~ small_x[correct_only_idx])
|
||||
lab_only_gt <- coef(m3)[2]
|
||||
lab_only_avg <- mean(coef(m1)[2], coef(m2)[2])
|
||||
lab_only_correct_only <- coef(m4)[2]
|
||||
return(tibble::tibble(N, m, Q, Bxy, B0, estimated_q, naive_Bxy = as.numeric(coef(naive_mod)[2]), real_Bxy = as.numeric(coef(real_mod)[2]), lab_only_gt= lab_only_gt, lab_only_avg = lab_only_avg, lab_only_correct_only = lab_only_correct_only))
|
||||
}
|
||||
|
||||
## res <- list()
|
||||
|
||||
## for (i in seq(nrow(conditions))) {
|
||||
## message(i)
|
||||
## res[[i]] <- purrr::map_dfr(1:100, ~.step(., conditions$Bxy[i], conditions$B0[i], conditions$Q[i], conditions$N[i], conditions$m[i]))
|
||||
## }
|
||||
|
||||
require(furrr)
|
||||
plan(multisession)
|
||||
|
||||
.run <- function(i, conditions) {
|
||||
purrr::map_dfr(1:100, ~.step(., conditions$Bxy[i], conditions$B0[i], conditions$Q[i], conditions$N[i], conditions$m[i]))
|
||||
}
|
||||
|
||||
res <- future_map(seq(nrow(conditions)), .run, conditions = conditions, .progress = TRUE)
|
||||
|
||||
##saveRDS(res, "rubin_res.RDS")
|
||||
|
||||
conditions <- tibble::as_tibble(conditions)
|
||||
conditions$res <- res
|
||||
|
||||
require(tidyverse)
|
||||
|
||||
conditions %>% mutate(loco_median = purrr::map_dbl(res, ~median(.$lab_only_correct_only)), loco_p025 = purrr::map_dbl(res, ~quantile(.$lab_only_correct_only, probs = 0.025)), loco_p975 = purrr::map_dbl(res, ~quantile(.$lab_only_correct_only, probs = 0.975))) %>% mutate(loa_median = purrr::map_dbl(res, ~median(.$lab_only_avg)), loa_p025 = purrr::map_dbl(res, ~quantile(.$lab_only_avg, probs = 0.025)), loa_p975 = purrr::map_dbl(res, ~quantile(.$lab_only_avg, probs = 0.975))) %>% filter(B0 == 0.1 & Bxy == 0.5) %>% select(N, m, Q, starts_with("loco"), starts_with("loa")) %>% pivot_longer(cols = loco_median:loa_p975, names_to = c("type", "tile"),names_pattern = "(.*)_(.*)", values_to = "value") %>% pivot_wider(names_from = "tile") %>% ggplot(aes(x = Q, y = median, ymin = p025, ymax = p975, fill = type, col = type)) + geom_line() + geom_ribbon(alpha = 0.2) + facet_grid(N~m) + geom_hline(yintercept = .5, linetype = 2, col = "grey")
|
Loading…
Reference in New Issue
Block a user