update submodules.

add changes from local.
changes from klone
2025-06-04 13:43:20 -07:00 · 2025-06-04 13:40:53 -07:00 · 2023-09-08 09:01:31 -07:00 · 2023-08-12 13:10:19 -07:00 · 2023-08-12 13:09:31 -07:00 · 2023-03-03 09:19:17 -08:00
60 changed files with 1517 additions and 3197 deletions
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,3 +1,12 @@
 [submodule "paper"]
 	path = paper
 	url = git@github.com:chainsawriot/measure.git
 [submodule "overleaf"]
 	path = overleaf
 	url = https://git.overleaf.com/62a956eb9b9254783cc84c82
 [submodule "misclassificationmodels"]
 	path = misclassificationmodels
 	url = https://github.com/chainsawriot/misclassificationmodels.git
 [submodule "presentation"]
 	path = presentation
 	url = https://git.overleaf.com/646be7922a7fb19bcb461593
--- a/3
+++ b/3
@@ -0,0 +1,3 @@
 all:
 	+$(MAKE) -C simulations
 	+$(MAKE) -C civil_comments
--- a/civil_comments/01_dv_example.R
+++ b/civil_comments/01_dv_example.R
@@ -0,0 +1,89 @@
 source('load_perspective_data.R')
 source("../simulations/measerr_methods.R")
 source("../simulations/RemembR/R/RemembeR.R")
 change.remember.file("dv_perspective_example.RDS")
 remember(accuracies, "civil_comments_accuracies")
 remember(f1s, "civil_comments_f1s")
 remember(positive_cases, "civil_comments_positive_cases")
 remember(proportions_cases, "civil_comments_proportions_cases")
 remember(cortab, "civil_comments_cortab")
 remember(nrow(df), 'n.annotated.comments')
 # for reproducibility
 set.seed(111)
 ## another simple enough example: is P(toxic | funny and white) > P(toxic | funny nand white)? Or, are funny comments more toxic when people disclose that they are white?
 compare_dv_models <-function(pred_formula, outcome_formula, proxy_formula, df, sample.prop, sample.size, remember_prefix){
    if(is.null(sample.prop)){
        sample.prop <- sample.size / nrow(df)
    }
    if(is.null(sample.size)){
        sample.size <- nrow(df) * sample.prop
    }
    pred_model <- glm(pred_formula, df, family=binomial(link='logit'))
    remember(sample.size, paste0(remember_prefix, "sample.size"))
    remember(sample.prop, paste0(remember_prefix, "sample.prop"))
    remember(pred_formula, paste0(remember_prefix, "pred_formula"))
    remember(outcome_formula, paste0(remember_prefix, 'outcome_formula'))
    remember(proxy_formula, paste0(remember_prefix, 'proxy_formula'))
    remember(coef(pred_model), paste0(remember_prefix, "coef_pred_model"))
    remember(diag(vcov((pred_model))), paste0(remember_prefix, "se_pred_model"))
    coder_model <- glm(outcome_formula, df, family=binomial(link='logit'))
    remember(coef(coder_model), paste0(remember_prefix, "coef_coder_model"))
    remember(diag(vcov((coder_model))), paste0(remember_prefix, "se_coder_model"))
    df_measerr_method <- copy(df)[sample(1:.N, sample.size), toxicity_coded_1 := toxicity_coded]
    df_measerr_method <- df_measerr_method[,toxicity_coded := toxicity_coded_1]
    sample_model <- glm(outcome_formula, df_measerr_method, family=binomial(link='logit'))
    remember(coef(sample_model), paste0(remember_prefix, "coef_sample_model"))
    remember(diag(vcov((sample_model))), paste0(remember_prefix, "se_sample_model"))
    measerr_model <- measerr_mle_dv(df_measerr_method, outcome_formula, outcome_family=binomial(link='logit'), proxy_formula=proxy_formula, proxy_family=binomial(link='logit'))
    inv_hessian = solve(measerr_model$hessian)
    stderr = diag(inv_hessian)
    remember(stderr, paste0(remember_prefix, "measerr_model_stderr"))
    remember(measerr_model$par, paste0(remember_prefix, "measerr_model_par"))
 }
 print("running first example")
 pred_formula = toxicity_pred ~ likes + race_disclosed
 outcome_formula = toxicity_coded ~ likes + race_disclosed
 proxy_formula = toxicity_pred ~ toxicity_coded*race_disclosed*likes
 compare_dv_models(pred_formula = pred_formula,
                  outcome_formula = outcome_formula,
                  proxy_formula = proxy_formula,
                  df=df,
                  sample.prop=0.01,
                  sample.size=NULL,
                  remember_prefix='cc_ex_tox.likes.race_disclosed')
 print("running second example")
 compare_dv_models(pred_formula = pred_formula,
                  outcome_formula = outcome_formula,
                  proxy_formula = proxy_formula,
                  df=df,
                  sample.size=10000,
                  sample.prop=NULL,
                  remember_prefix='cc_ex_tox.likes.race_disclosed.medsamp')
 print("running third example")
 compare_dv_models(pred_formula = pred_formula,
                  outcome_formula = outcome_formula,
                  proxy_formula = proxy_formula,
                  df=df,
                  sample.prop=0.05,
                  sample.size=NULL,
                  remember_prefix='cc_ex_tox.likes.race_disclosed.largesamp')
--- a/civil_comments/02_iv_example.R
+++ b/civil_comments/02_iv_example.R
@@ -0,0 +1,107 @@
 source("../simulations/RemembR/R/RemembeR.R")
 change.remember.file("iv_perspective_example.RDS")
 source('load_perspective_data.R')
 source("../simulations/measerr_methods.R")
 remember(accuracies, "civil_comments_accuracies")
 remember(f1s, "civil_comments_f1s")
 remember(positive_cases, "civil_comments_positive_cases")
 remember(proportions_cases, "civil_comments_proportions_cases")
 remember(cortab, "civil_comments_cortab")
 remember(nrow(df), 'n.annotated.comments')
 # for reproducibility
 set.seed(1)
 ## another simple enough example: is P(toxic | funny and white) > P(toxic | funny nand white)? Or, are funny comments more toxic when people disclose that they are white?
 compare_iv_models <-function(pred_formula, outcome_formula, proxy_formula, truth_formula, df, sample.prop, sample.size, remember_prefix){
    if(is.null(sample.prop)){
        sample.prop <- sample.size / nrow(df)
    }
    if(is.null(sample.size)){
        sample.size <- nrow(df) * sample.prop
    }
    remember(sample.size, paste0(remember_prefix, "sample.size"))
    remember(sample.prop, paste0(remember_prefix, "sample.prop"))
    remember(pred_formula, paste0(remember_prefix, "pred_formula"))
    remember(outcome_formula, paste0(remember_prefix, 'outcome_formula'))
    remember(proxy_formula, paste0(remember_prefix, 'proxy_formula'))
    remember(truth_formula, paste0(remember_prefix, 'truth_formula'))
    pred_model <- glm(pred_formula, df, family=binomial(link='logit'))
    remember(coef(pred_model), paste0(remember_prefix, "coef_pred_model"))
    remember(diag(vcov((pred_model))), paste0(remember_prefix, "se_pred_model"))
    coder_model <- glm(outcome_formula, df, family=binomial(link='logit'))
    remember(coef(coder_model), paste0(remember_prefix, "coef_coder_model"))
    remember(diag(vcov((coder_model))), paste0(remember_prefix, "se_coder_model"))
    df_measerr_method <- copy(df)[sample(1:.N, sample.size), toxicity_coded_1 := toxicity_coded]
    df_measerr_method <- df_measerr_method[,toxicity_coded := toxicity_coded_1]
    sample_model <- glm(outcome_formula, df_measerr_method, family=binomial(link='logit'))
    remember(coef(sample_model), paste0(remember_prefix, "coef_sample_model"))
    remember(diag(vcov((sample_model))), paste0(remember_prefix, "se_sample_model"))
    measerr_model <- measerr_mle(df_measerr_method, outcome_formula, outcome_family=binomial(link='logit'), proxy_formula=proxy_formula, proxy_family=binomial(link='logit'),truth_formula=truth_formula, truth_family=binomial(link='logit'))
    inv_hessian = solve(measerr_model$hessian)
    stderr = diag(inv_hessian)
    remember(stderr, paste0(remember_prefix, "measerr_model_stderr"))
    remember(measerr_model$par, paste0(remember_prefix, "measerr_model_par"))
 }
 ## print("running first iv example")
 ## sample.prop <- 0.05
 ## compare_iv_models(white ~ toxicity_pred*funny,
 ##                   outcome_formula = white ~ toxicity_coded*funny,
 ##                   proxy_formula = toxicity_pred ~ toxicity_coded*funny*white,
 ##                   truth_formula = toxicity_coded ~ 1,
 ##                   df=df,
 ##                   sample.prop=sample.prop,
 ##                   remember_prefix='cc_ex_tox.funny.white')
 pred_formula <- race_disclosed ~ likes * toxicity_pred
 outcome_formula <- race_disclosed ~ likes * toxicity_coded
 proxy_formula <- toxicity_pred ~ toxicity_coded * race_disclosed * likes
 truth_formula <- toxicity_coded ~ 1
 print("running first example")
 compare_iv_models(pred_formula = pred_formula,
                  outcome_formula = outcome_formula,
                  proxy_formula = proxy_formula,
                  truth_formula = truth_formula,
                  df=df,
                  sample.prop=0.01,
                  sample.size=NULL,
                  remember_prefix='cc_ex_tox.likes.race_disclosed')
 print("running second example")
 compare_iv_models(pred_formula = pred_formula,
                  outcome_formula = outcome_formula,
                  proxy_formula = proxy_formula,
                  truth_formula = truth_formula,
                  df=df,
                  sample.prop=NULL,
                  sample.size=10000,
                  remember_prefix='cc_ex_tox.likes.race_disclosed.medsamp')
 print("running third example")
 compare_iv_models(pred_formula = race_disclosed ~ likes * toxicity_pred,
                  outcome_formula = race_disclosed ~ likes * toxicity_coded,
                  proxy_formula = toxicity_pred ~ toxicity_coded + race_disclosed,
                  truth_formula = toxicity_coded ~ 1,
                  df=df,
                  sample.prop=0.05,
                  sample.size=NULL,
                  remember_prefix='cc_ex_tox.likes.race_disclosed.largesamp')
--- a/civil_comments/03_prob_not_pred.R
+++ b/civil_comments/03_prob_not_pred.R
@@ -0,0 +1,21 @@
 source('load_perspective_data.R')
 source("../simulations/RemembR/R/RemembeR.R")
 library(xtable)
 change.remember.file("prob_not_pred.RDS")
 ### to respond to the reviewer show what happens if we don't recode the predictions.
 non_recoded_dv <- lm(toxicity_prob ~ likes * race_disclosed, data=df)
 remember(coef(non_recoded_dv), "coef_dv")
 remember(diag(vcov(non_recoded_dv)), "se_dv")
 remember(xtable(non_recoded_dv),'dv_xtable')
 non_recoded_iv <- glm(race_disclosed ~ likes * toxicity_prob, data=df, family='binomial')
 remember(coef(non_recoded_iv), "coef_iv")
 remember(diag(vcov(non_recoded_iv)), "se_iv")
 remember(xtable(non_recoded_iv),'iv_xtable')
 remember(extract(non_recoded_iv,include.aic=F,include.bic=F,include.nobs=F,include.deviance=F,include.loglik=F),'non_recoded_iv')
 remember(extract(non_recoded_dv,include.rsquared=F,include.adjrs=F,include.nobs=F),'non_recoded_dv')
 tr <- texreg(list(r$non_recoded_iv, r$non_recoded_dv),custom.model.names=c("Example 1","Example 2"),custom.coef.map=list("(Intercept)"="Intercept","race_disclosedTRUE"="Identity Disclosure","toxicity_prob"="Toxicity Score","likes"="Likes","likes:race_disclosedTRUE"="Likes:Identity Disclosure","likes:toxicity_prob"="Likes:Toxicity Score"),single.row=T,dcolumn=T)
 print(tr)
 remember(tr, 'texregobj')
--- a/civil_comments/Makefile
+++ b/civil_comments/Makefile
@@ -0,0 +1,14 @@
 all: iv_perspective_example.RDS dv_perspective_example.RDS
 srun_1core=srun -A comdata -p compute-bigmem --mem=4G --time=12:00:00 -c 1 --pty /usr/bin/bash -l
 srun=srun -A comdata -p compute-bigmem --mem=4G --time=12:00:00 --pty /usr/bin/bash -l
 perspective_scores.csv: perspective_json_to_csv.sh perspective_results.json
 	$(srun_1core) ./$^ $@
 iv_perspective_example.RDS: 02_iv_example.R perspective_scores.csv
 	$(srun) Rscript $<
 dv_perspective_example.RDS: 01_dv_example.R perspective_scores.csv
 	 $(srun) Rscript $<
--- a/civil_comments/design_example.R
+++ b/civil_comments/design_example.R
@@ -0,0 +1,72 @@
 set.seed(1111)
 source('load_perspective_data.R')
 ## how accurate are the classifiers?
 ## the API claims that these scores are "probabilities"
 ## say we care about the model of the classification, not the probability
 ## toxicity error is weakly correlated pearson's R = 0.1 with both "white" and "black".
 ## compare regressions with "white" or "black" as the outcome and "toxicity_coded" or "toxicity_pred" as a predictor.
 ## here's a great example with presumambly non-differential error: about what identities is toxicity found humorous?
 ## a bunch of stars reappear when you used the ground-truth data instead of the predictions.
 ## pro/con of this example: will have to implement family='poisson'.
 ## shouldn't be that bad though haha.
 cortab['toxicity_error',]
 cortab['toxicity_error','funny']
 cortab['toxicity_coded',]
 cortab['identity_error',]
 cortab['white',]
 ## here's a simple example, is P(white | toxic and mentally ill) > P(white | toxic or mentally ill). Are people who discuss their mental illness in a toxic way more likely to be white compared to those who just talk about their mental illness or are toxic? 
 summary(glm(white ~ toxicity_coded*psychiatric_or_mental_illness, data = df, family=binomial(link='logit')))
 summary(glm(white ~ toxicity_pred*psychiatric_or_mental_illness, data = df, family=binomial(link='logit')))
 summary(glm(white ~ toxicity_coded*male, data = df, family=binomial(link='logit')))
 summary(glm(white ~ toxicity_pred*male, data = df, family=binomial(link='logit')))
 summary(glm(toxicity_coded ~ white*psychiatric_or_mental_illness, data = df, family=binomial(link='logit')))
 summary(glm(toxicity_pred ~ white*psychiatric_or_mental_illness, data = df, family=binomial(link='logit')))
 ## another simple enough example: is P(toxic | funny and white) > P(toxic | funny nand white)? Or, are funny comments more toxic when people disclose that they are white?
 summary(glm(toxicity_pred ~ funny*white, data=df, family=binomial(link='logit')))
 summary(glm(toxicity_coded ~ funny*white, data=df, family=binomial(link='logit')))
 source("../simulations/measerr_methods.R")
 saved_model_file <- "measerr_model_tox.eq.funny.cross.white.RDS"
 overwrite_model <- TRUE 
 # it works so far with a 20% and 15% sample. Smaller is better. let's try a 10% sample again. It didn't work out. We'll go forward with a 15% sample.
 df_measerr_method <- copy(df)[sample(1:.N, 0.05 * .N), toxicity_coded_1 := toxicity_coded]
 df_measerr_method <- df_measerr_method[,toxicity_coded := toxicity_coded_1]
 summary(glm(toxicity_coded ~ funny*white, data=df_measerr_method[!is.na(toxicity_coded)], family=binomial(link='logit')))
 if(!file.exists(saved_model_file) || (overwrite_model == TRUE)){
    measerr_model <- measerr_mle_dv(df_measerr_method,toxicity_coded ~ funny*white,outcome_family=binomial(link='logit'), proxy_formula=toxicity_pred ~ toxicity_coded*funny*white)
 saveRDS(measerr_model, saved_model_file)
 } else {
    measerr_model <- readRDS(saved_model_file)
 }
 inv_hessian <- solve(measerr_model$hessian)
 se <- diag(inv_hessian)
 lm2 <- glm.nb(funny ~ (male + female + transgender + other_gender + heterosexual + bisexual + other_sexual_orientation + christian + jewish + hindu + buddhist + atheist + other_religion + asian + latino + other_race_or_ethnicity + physical_disability + intellectual_or_learning_disability + white + black + psychiatric_or_mental_illness)*toxicity_pred, data = df)
 m3 <- glm.nb(funny ~ (male + female + transgender + other_gender + heterosexual + bisexual + other_sexual_orientation + christian + jewish + hindu + buddhist + atheist + other_religion + asian + latino + other_race_or_ethnicity + physical_disability + intellectual_or_learning_disability + white + black + psychiatric_or_mental_illness)*toxicity, data = df)
 glm(white ~ disagree, data = df, family=binomial(link='logit'))
 ## example with differential error
 glm(white ~ toxicity_coded + toxicity_error, data=df,family=binomial(link='logit'))
 glm(toxicity_coded ~ white, data = df, family=binomial(link='logit'))
 glm(toxicity_pred ~ white, data = df, family=binomial(link='logit'))
--- a/civil_comments/load_perspective_data.R
+++ b/civil_comments/load_perspective_data.R
@@ -0,0 +1,136 @@
 library(data.table)
 library(MASS)
 set.seed(1111)
 scores <- fread("perspective_scores.csv")
 scores <- scores[,id:=as.character(id)]
 df <- fread("all_data.csv")
 # only use the data that has identity annotations
 df <- df[identity_annotator_count > 0]
 (df[!(df$id %in% scores$id)])
 df <- df[scores,on='id',nomatch=NULL]
 df[, ":="(identity_attack_pred = identity_attack_prob >=0.5,
          insult_pred = insult_prob >= 0.5,
          profanity_pred = profanity_prob >= 0.5,
          severe_toxicity_pred = severe_toxicity_prob >= 0.5,
          threat_pred = threat_prob >= 0.5,
          toxicity_pred = toxicity_prob >= 0.5,
          identity_attack_coded = identity_attack >= 0.5,
          insult_coded = insult >= 0.5,
          profanity_coded = obscene >= 0.5,
          severe_toxicity_coded = severe_toxicity >= 0.5,
          threat_coded = threat >= 0.5,
          toxicity_coded = toxicity >= 0.5
          )]
 gt.0.5 <- function(v) { v >= 0.5 }
 dt.apply.any <- function(fun, ...){apply(apply(cbind(...), 2, fun),1,any)}
 df <- df[,":="(gender_disclosed = dt.apply.any(gt.0.5, male, female, transgender, other_gender),
               sexuality_disclosed = dt.apply.any(gt.0.5, heterosexual, bisexual, other_sexual_orientation),
               religion_disclosed = dt.apply.any(gt.0.5, christian, jewish, hindu, buddhist, atheist, muslim, other_religion),
               race_disclosed = dt.apply.any(gt.0.5, white, black, asian, latino, other_race_or_ethnicity), 
               disability_disclosed = dt.apply.any(gt.0.5,physical_disability, intellectual_or_learning_disability, psychiatric_or_mental_illness, other_disability))]
 df <- df[,white:=gt.0.5(white)]
 F1 <- function(y, predictions){
    tp <- sum( (predictions == y) & (predictions==1))
    fn <- sum( (predictions != y) & (predictions!=1))
    fp <- sum( (predictions != y) & (predictions==1))
    precision <- tp / (tp + fp)
    recall <- tp / (tp + fn)
    return (2 * precision * recall ) / (precision + recall)
 }
 ## toxicity is about 93% accurate, with an f1 of 0.8
 ## identity_attack has high accuracy 97%, but an unfortunant f1 of 0.5.
 ## threat has high accuracy 99%, but a really bad looking f1 of 0.48.
 accuracies <- df[,.(identity_attack_acc = mean(identity_attack_pred == identity_attack_coded),
                    insult_pred_acc = mean(insult_pred == insult_coded),
                    profanity_acc = mean(profanity_pred == profanity_coded),
                    severe_toxicity_acc = mean(severe_toxicity_pred == severe_toxicity_coded),
                    theat_acc = mean(threat_pred == threat_coded),
                    toxicity_acc = mean(toxicity_pred == toxicity_coded))]
 f1s <- df[,.(identity_attack_f1 = F1(identity_attack_coded,identity_attack_pred),
                    insult_f1 = F1(insult_coded,insult_pred),
                    profanity_f1 = F1(profanity_coded,profanity_pred),
                    severe_toxicity_f1 = F1(severe_toxicity_coded,severe_toxicity_pred),
                    theat_f1 = F1(threat_coded,threat_pred),
                    toxicity_f1 = F1(toxicity_coded,toxicity_pred))]
 positive_cases <- df[,.(identity_attacks = sum(identity_attack_coded),
                        insults = sum(insult_coded),
                        profanities = sum(profanity_coded),
                        severe_toxic_comments = sum(severe_toxicity_coded),
                        threats = sum(threat_coded),
                        toxic_comments = sum(toxicity_coded))]
 ## there are 50,000 toxic comments, 13000 identity attacks, 30000 insults, 3000 profanities, 8 severe toxic, and 1000 threats.
 proportions_cases <- df[,.(prop_identity = mean(identity_attack_coded),
                           prop_insults = mean(insult_coded),
                           prop_profanity = mean(profanity_coded),
                           prop_severe = mean(severe_toxicity_coded),
                           prop_threats = mean(threat_coded),
                           prop_toxic = mean(toxicity_coded))]
 ## at 11% of comments, "toxicity" seems not so badly skewed. Try toxicity first, and if it doesn't work out try insults.
 ## now look for an example where differential error affects an identity, or a reaction.
 df <- df[,":="(identity_error = identity_attack_coded - identity_attack_pred,
               insult_error = insult_coded - insult_pred,
               profanity_error = profanity_coded - profanity_pred,
               severe_toxic_error = severe_toxicity_coded - severe_toxicity_pred,
               threat_error = threat_coded - threat_pred,
               toxicity_error = toxicity_coded - toxicity_pred)]
 ## what's correlated with toxicity_error ?
 df <- df[,approved := rating == "approved"]
 df <- df[,white := white > 0.5]
 cortab <- cor(df[,.(toxicity_error,
                    identity_error,
                    toxicity_coded,
                    funny,
                    approved,
                    sad,
                    wow,
                    likes,
                    disagree,
                    male,
                    female,
                    transgender,
                    other_gender,
                    heterosexual,
                    bisexual,
                    other_sexual_orientation,
                    christian,
                    jewish,
                    hindu,
                    buddhist,
                    atheist,
                    other_religion,
                    black,
                    white,
                    asian,
                    latino,
                    other_race_or_ethnicity,
                    physical_disability,
                    intellectual_or_learning_disability,
                    psychiatric_or_mental_illness,
                    other_disability,
                    gender_disclosed,
                    sexuality_disclosed,
                    religion_disclosed,
                    race_disclosed,
                    disability_disclosed)])
--- a/civil_comments/perspective_json_to_csv.jq
+++ b/civil_comments/perspective_json_to_csv.jq
@@ -0,0 +1,2 @@
 #!/usr/bin/bash
 cat $1 | jq "[.attributeScores.IDENTITY_ATTACK.summaryScore.value, .attributeScores.INSULT.summaryScore.value, .attributeScores.PROFANITY.summaryScore.value,.attributeScores.SEVERE_TOXICITY.summaryScore.value, .attributeScores.THREAT.summaryScore.value, .attributeScores.TOXICITY.summaryScore.value] | @csv" > $2
--- a/civil_comments/perspective_json_to_csv.sh
+++ b/civil_comments/perspective_json_to_csv.sh
@@ -0,0 +1,4 @@
 #!/usr/bin/bash
 header=id,identity_attack_prob,insult_prob,profanity_prob,severe_toxicity_prob,threat_prob,toxicity_prob
 echo "$header" > $2
 cat $1 | jq -r '[.id, .attributeScores.IDENTITY_ATTACK.summaryScore.value, .attributeScores.INSULT.summaryScore.value, .attributeScores.PROFANITY.summaryScore.value,.attributeScores.SEVERE_TOXICITY.summaryScore.value, .attributeScores.THREAT.summaryScore.value, .attributeScores.TOXICITY.summaryScore.value] | @csv' >>  $2
--- a/1
+++ b/1
--- a/1
+++ b/1
--- a/1
+++ b/1
--- a/1
+++ b/1
--- a/presentations/TADA_2022/Tada
+++ b/presentations/TADA_2022/Tada
--- a/presentations/TADA_2022/tada_2020.mkv
+++ b/presentations/TADA_2022/tada_2020.mkv
--- a/presentations/TADA_2022/tada_2020.mp4
+++ b/presentations/TADA_2022/tada_2020.mp4
--- a/presentations/ica_hackathon_2022/.Rhistory
+++ b/presentations/ica_hackathon_2022/.Rhistory
@@ -1,291 +0,0 @@
 ls()
 weight
 weight
 lablr
 labelr
 nrow(labelr)
 names(labelr)
 names(labelr$data)
 labelr$data
 labelr
 names(labelr)
 labelr$labelr
 labelr$toxic
 setwd("..")
 q()
 n
 summary(toxicity_calibrated)
 qplot(labelr$toxic,type='hist')
 names(labelr)
 labelr$n
 labelr
 names(labelr)
 fbyg
 gghist(fbyg$weight)
 hist(fbyg$weight)
 hist(log(fbyg$weight))
 fbyg$weight==1
 all(fbyg$weight==1)
 fbyg$weight[fbyg$weight != 1]
 fbyg[fbyg$weight != 1]
 fbyg[,fbyg$weight != 1]
 fbyg[[fbyg$weight != 1]]
 fbyg[fbyg$weight != 1,]
 names(labelr)
 summary(toxicity_calibrated)
 toxicity_calibrated
 val.data
 names(labelr)
 labelr$data
 labelr
 labelr[data]
 labelr[data=='yg']
 labelr[,data=='yg']
 labelr[data=='yg',]
 labelr[labelr$data=='yg']
 labelr[,labelr$data=='yg']
 labelr[labelr$data=='yg',]
 toxicity_calibrated
 summary(toxicity_calibrated)
 yg3
 yg3[,['toxic','toxic_pred']]
 yg3 %>% select('toxic','toxic_pred')
 yg3 |> select('toxic','toxic_pred')
 names(yg3)
 yg3[,c('toxic_pred','toxic')]
 corr(yg3[,c('toxic_pred','toxic')])
 cor(yg3[,c('toxic_pred','toxic')])
 cor(yg3[,c('toxic_pred','toxic')],na.rm=T)
 cor(yg3[,c('toxic_pred','toxic')],rm.na=T)
 ?cor(yg3[,c('toxic_pred','toxic')],use=
 ?cor
 cor(yg3[,c('toxic_pred','toxic')],use='all.obs')
 ?cor
 cor(yg3[,c('toxic_pred','toxic')],use='complete.obs')
 cor(yg3[,c('toxic_pred','toxic')],use='complete.obs',method='spearman')
 ?predict
 yg3$toxic_pred
 names(preds)
 preds
 preds
 preds$error
 preds
 preds
 summary(errormod)
 summary(errormod)
 summary(preds)
 names(preds)
 preds
 resids
 qplot(resids)
 resids
 ?predict.lm
 dnorm(1)
 dnorm(2)
 dnorm(1)
 pnorm(1)
 preds
 p1 + p2
 p1 + p2
 p1
 p2
 preds
 preds1 <- preds
 preds1$diff - preds$diff
 preds1$diff 
 preds1$diff - preds1$diff
 preds1$diff - preds$diff
 preds1$diff - preds$diff
 preds1$diff - preds$diff
 preds1$diff - preds$diff
 preds1
 preds
 dnorm(-1)
 dnorm(1)
 pnorm(1)
 pnorm(-1)
 pnorm(2)
 pnorm(9)
 pnorm(6)
 pnorm(2)
 dnorm(0.95)
 qnorm(0.95)
 qnorm(0.841)
 fulldata_preds
 names(yg3)
 yg3$toxic_feature_1
 yg3$toxic_feature_2
 yg3
 yg3[,.(toxic_pred,toxic_var)]
 yg3[,.(toxic_pred,toxicity_2_pred_sigma,toxicity_1_pred_sigma)]
 yg3[,.(toxic_pred,toxicity_2_pred_sigma,toxicity_1_pred_sigma,cov(toxicity_2_pred,toxicity_1_pred))]
 cov(1,2)
 cov(c(1),c(3))
 cov(c(1),c(3,2))
 cov(c(1,1),c(3,2))
 cov(c(1,2),c(3,2))
 covterm
 covterm
 ?cov
 covterm
 yg3
 yg3[,.(toxic_pred,toxicity_2_pred_sigma,toxicity_1_pred_sigma,cov(toxicity_2_pred,toxicity_1_pred))]
 yg3[,.(toxic_pred,toxicity_2_pred_sigma,toxicity_1_pred_sigma,toxic_var)]
 yg3[,.(toxic_pred,toxicity_2_pred_sigma,toxicity_1_pred_sigma,toxic_var,toxic_sd)]
 yg3
 names(yg3)
 print(sg)
 print(sg)
 1+1
 library(stargazer)
 stargazer(w1,w2,w3,w4,w5,t1,t2,t3,t4,t5, type="text",
          keep = c("cond1","meantox","cond1:meantox","Constant"),
          keep.stat=c("n","adj.rsq"),
          model.numbers = F,
          dep.var.labels = c("DV = Willingness to comment","DV = Toxicity of YG respondent comments"),
          covariate.labels = c("Treatment (top comments shown)",
                               "Average toxicity of top comments",
                               "Treatment $\times$ top comments toxicity",
                               "Constant"),
          add.lines = list(c("Article fixed effects","No","No","No","Yes","Yes","No","No","No","Yes","Yes")),
          star.cutoffs = c(0.05,0.01,0.005),
          notes = "Standard errors are clustered at the respondent level.",
          column.labels = c("(1)","(2)","(3)","(4)","(5)","(6)","(7)","(8)","(9)","(10)"),
          style = "apsr")
 q()
 n
 yglabels
 labelr
 names(labelr)
 fb
 names(fb
 )
 fb.comment_id
 fb['comment_id']
 fb[,'comment_id']
 labelr[,'comment_id']
 names(fb)
 fb.labeled
 names(fb.labeled)
 names(yg)
 ?amelia
 yg
 names(yg)
 names(yg3)
 ?rbind
 nrow(yg3)
 nrow(yg)
 yg3[,.(.N),by=.(toxic,fb)]
 yg3.toimpute
 names(yg3.toimpute)
 yg3.toimpute
 names(yg3.toimpute)
 names(labelr)
 nrow(yg3)
 nrow(labelr)
 ?merge.data.table
 labelr
 is.data.table(labelr)
 yg3.toimpute
 overimp.grid
 overimp.grid
 ?amelia
 q()
 n
 setwd("presentations/ica_hackathon_2022/")
 ls()
 attach(r)
 example_2_B.plot.df
 library(ggplot2)
 example_2_B.plot.df[(variable=='x') && (m < 1000)]
 example_2_B.plot.df[(variable=='x') && (m < 1000)]
 theme_set(theme_default())
 theme_set(theme_minimal())
 theme_set(theme_classic())
 example_2_B.plot.df[(variable=='x') && (m < 1000)]
 example_2_B.plot.df[(variable=='x') && (m < 1000),unique(method)]
 as.factor
 update.packages()
 update.packages()
 update.packages()
 cancel
 plot.df
 example_2_B.plot.df
 plot.df
 example_2_B.plot.df
 example_2_B.plot.df$method %>% unique
 example_2_B.plot.df$method |> unique
 example_2_B.plot.df$method  |> uniq
 unique(example_2_B.plot.df$method)
 example_2_B.plot.df$method
 example_2_B.plot.df$method
 example_2_B.plot.df$method
 example_2_B.plot.df$method
 example_2_B.plot.df <- r$example_2_B.plot.df
 q()
 n
 setwd("presentations/ica_hackathon_2022/')
 setwd("presentations/ica_hackathon_2022/")
 example_2_B.plot.df$method
 example_2_B.plot.df$method
 q()
 n
 example_2_B.plot.df$method
 example_2_B.plot.df$method
 q()
 n
 example_2_B.plot.df$method
 example_2_B.plot.df$method
 q()
 n
 q()
 n
 plot.df
 plot.df
 plot.df[,.N,by=.(N,m)]
 plot.df[,.N,by=.(N,m,method)]
 plot.df[variable=='x',.N,by=.(N,m,method)]
 plot.df
 plot.df[(variable=='x') & (m < 1000) & (!is.na(p.true.in.ci))]
 plot.df[(variable=='x') & (m != 1000) & (!is.na(p.true.in.ci))]
 plot.df
 ?label_wrap_gen
 install.packages("ggplot2")
 devtools::install_github("tidyverse/ggplot2")
 2
 library(ggplot2)
 ggplot2::version
 sessioninfo()
 sessionInfo()
 q()
 n
 sessionInfo()
 ?scale_x_discrete
 ?facet_grid
 plot.df
 plot.df
 plot.df[method="2SLS+gmm"]
 plot.df[method=="2SLS+gmm"]
 df <- example_2_B.plot.df
 df
 q()
 n
 plot.df
 plot.df[m=50]
 plot.df[m==50]
 plot.df.example.2[m==50][method=2SLS+gmm]
 plot.df.example.2[m==50][method==2SLS+gmm]
 plot.df.example.2[(m==50) & (method==2SLS+gmm)]
 plot.df.example.2[(m==50) & (method=="2SLS+gmm")]
 plot.df[m==50]
 plot.df.example.3
 plot.df.example.3
 plot.df.example.3[N=25000]
 plot.df.example.3[N==25000]
 plot.df
 plot.df
 plot.df
 q()
 n
--- a/presentations/ica_hackathon_2022/Makefile
+++ b/presentations/ica_hackathon_2022/Makefile
@@ -1,26 +0,0 @@
 #!/usr/bin/make
 all:html pdf
 html: $(patsubst %.Rmd,%.html,$(wildcard *.Rmd)) 
 pdf: $(patsubst %.Rmd,%.pdf,$(wildcard *.Rmd)) 
 remembr.RDS:
 	rsync klone:/gscratch/comdata/users/nathante/ml_measurement_error/mi_simulations/remembr.RDS .
 %.pdf:  %.html
 	Rscript -e 'xaringan::decktape("$<","$@",docker=FALSE)'
 %.html: %.Rmd *.css remembr.RDS
 	Rscript -e 'library(rmarkdown); rmarkdown::render("$<", output_file = "$@")'
 #	firefox "$@"
 clean:
 	rm *.html
 	rm -r *_files
 	rm -r *_cache
 publish: all pdf
 	scp -r *.html groc:/home/nathante/public_html/slides/measurement_error_comdatahack_2022.html
 	scp -r *.pdf groc:/home/nathante/public_html/slides/measurement_error_comdatahack_2022.pdf
 .PHONY: clean all
--- a/presentations/ica_hackathon_2022/fontawesome.min.css
+++ b/presentations/ica_hackathon_2022/fontawesome.min.css
--- a/presentations/ica_hackathon_2022/ica_hackathon_2022.Rmd
+++ b/presentations/ica_hackathon_2022/ica_hackathon_2022.Rmd
@@ -1,724 +0,0 @@
 ---
 title: "How good of a model do you need? Accounting for classification errors in machine assisted content analysis."
 author: Nathan TeBlunthuis
 date: May 24 2022
 template: "../resources/template.html"
 output: 
  xaringan::moon_reader:
    lib_dir: libs
    seal: false
    nature: 
        highlightStyle: github
        ratio: 16:9
        countIncrementalSlides: true
        slideNumberFormat: |
          <div class="progress-bar-container">
            <div class="progress-bar" style="width: calc(%current% / %total% * 100%);">
            </div>
          </div>
    self_contained: false
    css: [default, my-theme.css, fontawesome.min.css]
    chakra: libs/remark-latest.min.js
 ---
 ```{r echo=FALSE, warning=FALSE, message=FALSE}
 library(knitr)
 library(ggplot2)
 library(data.table)
 library(icons)
 f <- function (x) {formatC(x, format="d", big.mark=',')}
 theme_set(theme_bw())
 r <- readRDS('remembr.RDS')
 attach(r)
 ```
 class: center, middle, narrow
 <script type='javascript'>
 window.MathJax = {
  loader: {load: ['[tex]/xcolor']},
  tex: {packages: {'[+]': ['xcolor']}}
 };
 </script>
 <div class="my-header"></div>
 ###  .title-heading[Unlocking the power of big data: The importance of measurement error in machine assisted content analysis]
 ## Nathan TeBlunthuis
 <img src="images/nu_logo.png" height="170px" style="padding:21px"/> <img src="images/uw_logo.png" height="170px" style="padding:21px"/> <img src="images/cdsc_logo.png" height="170px" style="padding:21px"/>
 `r icons::fontawesome('envelope')` nathan.teblunthuis@northwestern.edu
 `r icons::fontawesome('globe')` [https://teblunthuis.cc](https://teblunthuis.cc)
 ???
 This talk will be me presenting my "lab notebook" and not a polished research talk.  Maybe it would be a good week of a graduate seminar? In sum, machine assisted content analysis has unique limitations and threats to validity that I wanted to understand better.  I've learned how the noise introduced by predictive models can result in misleading statistical inferences, but that a sample of human-labeled validation data can often be used to account for this noise and obtain accurate inferences in the end.  Statistical knowledge of this problem and computational tools for addressing are still in development.  My goals for this presentation are to start sharing this information with the community and hopeful to stimulate us to work on extending existing approaches or using them in our work.
 This is going to be a boring talk about some *very* technical material. If you're not that interested please return to your hackathon. Please interrupt me if I'm going too fast for you or if you don't understand something.  I will try to move quickly in the interests of those wishing to wrap up their hackathon projects. I will also ask you to show hands once or twice, if you are already familiar with some concepts that it might be expedient to skip.
 ---
 class:center, middle, inverse
 ## Machine assistent content analysis (MACA)
 ???
 I'm going to start by defining a study design that is increasingly common, especially in Communication and Political Science, but also across the social sciences and beyond. I call it *machine assisted content analysis* (MACA).
 ---
 <div class="my-header"></div>
 ### .border[Machine assisted content analysis (MACA) uses machine learning for scientific measurement.]
 .emph[Content analysis:] Statistical analysis of variables measured by human labeling ("coding") of content.  This might be simple categorical labels, or maybe more advanced annotations.
 --
 *Downside:* Human labeling is *a lot* of work.
 --
 .emph[Machine assisted content analysis:] Use a *predictive algorithm* (often trained on human-made labels) to measure variables for use in a downstream *primary analysis.*
 --
 *Downside:*  Algorithms can be *biased* and *inaccurate* in ways that could invalidate the statistical analysis.
 ???
 A machine assisted content analysis can be part of a more complex or more powerful study design (e.g., an experiment, time series analysis &c). 
 ---
 <!-- <div class="my-header"></div> -->
 <!-- ### .border[Hypothetical Example: Predicting Racial Harassement in Social Media Comments] -->
 ---
 class:large 
 <div class="my-header"></div>
 ### .border[How can MACA go wrong?]
 Algorithms can be *biased* and *error prone* (*noisy*).
 --
 Predictor bias is a potentially difficult problem that requires causal inference methods. I'll focus on *noise* for now.
 --
 Noise in the predictive model introduces bias in the primary analysis.
 --
 .indent[We can reduce and sometimes even *eliminate* this bias introduced by noise.]
 ---
 layout:true
 <div class="my-header"></div>
 ### .border[Example 1: An unbiased, but noisy classifier]
 .large[.left-column[![](images/example_1_dag.png)]]
 ???
 Please show hands if you are familiar with causal graphs or baysian networks.  Should I explain what this diagram means?
 ---
 .right-column[
 $x$ is *partly observed* because we have *validation data* $x^*$.
 ]
 ---
 .right-column[
 $x$ is *partly observed* because we have *validation data* $x^*$.
 $k$ are the *features* used by the *predictive model* $g(k)$.
 ]
 ---
 .right-column[
 $x$ is *partly observed* because we have *validation data* $x^*$.
 $k$ are the *features* used by the *predictive model* $g(k)$.
 The predictions $w$ are a *proxy variable*  $g(k) = \hat{x} = w$.
 ]
 ---
 .right-column[
 $x$ is *partly observed* because we have *validation data* $x^*$.
 $k$ are the *features* used by the *predictive model* $g(k)$.
 The predictions $w$ are a *proxy variable*  $g(k) = \hat{x} = w$.
 $x = w + \xi$ because the predictive model makes errors.
 ]
 ---
 layout:true
 <div class="my-header"></div>
 ### .border[Noise in a *covariate* creates *attenuation bias*.]
 .large[.left-column[![](images/example_1_dag.png)]]
 ---
 .right-column[
 We want to estimate, $y = Bx + \varepsilon$, but we estimate $y = Bw + \varepsilon$ instead.
 $x = w + \xi$ because the predictive model makes errors.
 ]
 ---
 .right-column[
 We want to estimate, $y = Bx + \varepsilon$, but we estimate $y = Bw + \varepsilon$ instead.
 $x = w + \xi$ because the predictive model makes errors.
 Assume $g(k)$ is *unbiased* so $E(\xi)=0$. Also assume error is *nondifferential* so $E(\xi y)=0$:
 ]
 ---
 .right-column[
 We want to estimate, $y = Bx + \varepsilon$, but we estimate $y = Bw + \varepsilon$ instead.
 $x = w + \xi$ because the predictive model makes errors.
 Assume $g(k)$ is *unbiased* so $E(\xi)=0$. Also assume error is *nondifferential* so $E(\xi y)=0$:
 $$\widehat{B_w}^{ols}=\frac{\sum^n_{j=j}{(x_j + \xi_j - \overline{(x + \xi)})}(y_j - \bar{y})}{\sum_{j=1}^n{(x_j + \xi_j - \overline{(x+\xi)})^2}} = \frac{\sum^n_{j=j}{(x_j - \bar{x})(y_j -
 \bar{y})}}{\sum_{j=1}^n{(x_j + \xi_j - \bar{x}){^2}}}$$
 ]
 ---
 .right-column[
 We want to estimate, $y = Bx + \varepsilon$, but we estimate $y = Bw + \varepsilon$ instead.
 $x = w + \xi$ because the predictive model makes errors.
 Assume $g(k)$ is *unbiased* so $E(\xi)=0$. Also assume error is *nondifferential* so $E(\xi y)=0$:
 $$\widehat{B_w}^{ols}=\frac{\sum^n_{j=j}{(x_j + \xi_j - \overline{(x + \xi)})}(y_j - \bar{y})}{\sum_{j=1}^n{(x_j + \xi_j - \overline{(x+\xi)})^2}} = \frac{\sum^n_{j=j}{(x_j - \bar{x})(y_j -
 \bar{y})}}{\sum_{j=1}^n{(x_j + \color{red}{\xi_j} - \bar{x})\color{red}{^2}}}$$
 In this scenario, it's clear that $\widehat{B_w}^{ols} < B_x$.
 ]
 ???
 Please raise your hands if you're familiar with attenuation bias.  I expect that its covered in some graduate stats classes, but not universally.
 ---
 class:large
 layout:false
 <div class="my-header"></div>
 ### .border[Beyond attenuation bias]
 .larger[Measurement error can theaten validity because:]
 - Attenuation bias *spreads* (e.g., to marginal effects as illustrated later).
 --
 - Measurement error can be *differential*— not distributed evenly and possible correlated with $x$, $y$, or $\varepsilon$.
 --
 - *Bias can be away from 0* in GLMs and nonlinear models or if measurement error is differential.
 --
 - *Confounding* if the *predictive model is biased* introducing a correlation the measurement error and the residuals $(E[\xi\varepsilon]=0)$. 
 ---
 class:large
 layout:false
 <div class="my-header"></div>
 ### .border[Correcting measurement error]
 There's a vast literature in statistics on measurement error. Mostly about noise you'd find in sensors. Lots of ideas. No magic bullets.
 --
 I'm going to briefly cover 3 different approaches: *multiple imputation*,  *regression calibration* and *2SLS+GMM*.
 --
 These all depend on *validation data*. I'm going to ignore where this comes from, but assume it's a random sample of the hypothesis testing dataset. 
 --
 You can *and should* use it to improve your statistical estimates. 
 ---
 <div class="my-header"></div>
 ### .border[Multiple Imputation (MI) treats Measurement Error as a Missing Data Problem]
 1. Use validation data to estimate $f(x|w,y)$, a probabilistic model of $x$.
 --
 2. *Sample* $m$ datasets from $\widehat{f(x|w,y)}$.
 --
 3. Run your analysis on each of the $m$ datasets.
 --
 4. Average the results from the $m$ analyses using Rubin's rules.
 --
 .e[Advantages:] *Very flexible!* Sometimes can work if the predictor $g(k) $ is biased. Good R packages (**`{Amelia}`**, `{mi}`, `{mice}`, `{brms}`).
 --
 .e[Disadvantages:] Results depend on quality of $\widehat{f(x|w,y)}$; May require more validation data, computationally expensive, statistically inefficient and doesn't seem to benefit much from larger datasets.
 ---
 ### .border[Regression calibration directly adjusts for attenuation bias.]
 1. Use validation data to estimate the errors $\hat{\xi}$.
 --
 2. Use $\hat{\xi}$ to correct the OLS estimate.
 --
 3. Correct the standard errors using MLE or bootstrapping. 
 --
 .e[Advantages:] Simple, fast.
 --
 .e[Disadvantages:] Limited to OLS models. Requires an unbiased predictor $g(k)$. R support (`{mecor}` R package) is pretty new. 
 ---
 layout:true
 ### .border[2SLS+GMM is designed for this specific problem]
 .left-column[![](images/Fong_Taylor.png)]
 *Regression calibration with a trick.*
 ---
 .right-column[
 1. Estimate $x = w + \xi$ to obtain $\hat{x}$. (First-stage LS).
 ]
 ---
 .right-column[
 1. Estimate $x = w + \xi$ to obtain $\hat{x}$. (First-stage LS).
 2. Estimate $y = B^{2sls}\hat{x} + \varepsilon^{2sls}$. (Second-stage LS / regression calibration).
 ]
 ---
 .right-column[
 1. Estimate $x = w + \xi$ to obtain $\hat{x}$. (First-stage LS).
 2. Estimate $y = B^{2sls}\hat{x} + \varepsilon^{2sls}$.  (Second-stage LS / regression calibration).
 3. Estimate $y = B^{val}x^* + \varepsilon^{val}$. (Validation dataset model).
 ]
 ---
 .right-column[
 1. Estimate $x = w + \xi$ to obtain $\hat{x}$. (First-stage LS).
 2. Estimate $y = B^{2sls}\hat{x} + \varepsilon^{2sls}$.  (Second-stage LS / regression calibration).
 3. Estimate $y = B^{val}x^* + \varepsilon^{val}$. (Validation dataset model).
 4. Combine $B^{val}$ and $B^{2sls}$ using the generalized method of moments (GMM). 
 ]
 ---
 .right-column[
 1. Estimate $x = w + \xi$ to obtain $\hat{x}$. (First-stage LS).
 2. Estimate $y = B^{2sls}\hat{x} + \varepsilon^{2sls}$.  (Second-stage LS / regression calibration).
 3. Estimate $y = B^{val}x^* + \varepsilon^{val}$. (Validation dataset model).
 4. Combine $B^{val}$ and $B^{2sls}$ using the generalized method of moments (GMM). 
 Advantages: Accurate. Sometimes robust if biased predictor $g(k)$ is biased.  In theory, flexible to any models that can be fit using GMM. 
 ]
 ---
 .right-column[
 1. Estimate $x = w + \xi$ to obtain $\hat{x}$. (First-stage LS).
 2. Estimate $y = B^{2sls}\hat{x} + \varepsilon^{2sls}$.  (Second-stage LS / regression calibration).
 3. Estimate $y = B^{val}x^* + \varepsilon^{val}$. (Validation dataset model).
 4. Combine $B^{val}$ and $B^{2sls}$ using the generalized method of moments (GMM). 
 Advantages: Accurate. Sometimes robust if biased predictor $g(k)$ is biased.  In theory, flexible to any models that can be fit using GMM. 
 Disadvantages: Implementation (`{predictionError}`) is new. API is cumbersome and only supports linear models. Not robust if $E(w\varepsilon) \ne 0$. GMM may be unfamiliar to audiences. 
 ]
 ---
 layout:false
 ### .border[Testing attention bias correction]
 <div class="my-header"></div>
 I've run simulations to test these approaches in several scenarios. 
 I simulate random data, fit 100 models and plot the average estimate and its variance.
 The model is not very good: about 70% accurate.
 Most plausible scenario: 
 y is continuous and normal-ish.
 --
 $x$ is binary (human labels) $P(x)=0.5$.
 --
 $w$ is the *continuous predictor* (e.g., probability) output of $f(x)$ (not binary predictions).
 --
 if $w$ is binary, most methods struggle, but regression calibration and 2SLS+GMM can do okay.
 ---
 layout:false
 ### .border[Example 1: estimator of the effect of x]
 .right-column[
 ```{r echo=FALSE, message=FALSE, warning=FALSE, result='asis', dev='svg', fig.width=7.5, fig.asp=.625,cache=F}
 #plot.df <- 
 plot.df <- plot.df.example.1[,':='(method=factor(method,levels=c("Naive","Multiple imputation", "Multiple imputation (Classifier features unobserved)","Regression Calibration","2SLS+gmm","Feasible"),ordered=T),
                                     N=factor(N),
                                     m=factor(m))]
 plot.df <- plot.df[(variable=='x') & (m != 1000) & (m!=500) & (N!=10000) & !is.na(p.true.in.ci) & (method!="Multiple imputation (Classifier features unobserved)")]
 p <- ggplot(plot.df, aes(y=mean.est, ymax=mean.est + var.est/2, ymin=mean.est-var.est/2, x=method))
 p <- p + geom_hline(aes(yintercept=0.2),linetype=2)
 p <- p + geom_pointrange() + facet_grid(m~N,as.table=F) + scale_x_discrete(labels=label_wrap_gen(4))
 print(p)
 # get gtable object
 ```
 ]
 .left-column[
 All methods work in this scenario
 Multiple imputation is inefficient. 
 ]
 ---
 ### .border[What about bias?]
 .left-column[
 .large[![](images/example_2_dag.png)]
 ]
 .right-column[
 A few notes on this scenario. 
 $B_x = 0.2$, $B_g=-0.2$ and $sd(\varepsilon)=3$. So the signal-to-noise ratio is high. 
 $r$ can be concieved of as a missing feature in the predictive model $g(k)$ that is also correlated with $y$.
 For example $r$ might be the *race* of a commentor,  $x$ could be *racial harassment*, $y$ whether the commentor gets banned and $k$ only has textual features but human coders can see user profiles to know $r$. 
 ]
 ---
 layout:false
 ### .border[Example 2: Estimates of the effect of x ]
 .center[
 ```{r echo=FALSE, message=FALSE, warning=FALSE, result='asis', dev='svg', fig.width=8, fig.asp=.625,cache=F}
 #plot.df <- 
 plot.df <- plot.df.example.2B[,':='(method=factor(method,levels=c("Naive","Multiple imputation", "Multiple imputation (Classifier features unobserved)","Regression Calibration","2SLS+gmm","Feasible"),ordered=T),
                                     N=factor(N),
                                     m=factor(m))]
 plot.df <- plot.df[(variable=='x') & (m != 1000) & (m!=500) & (N!=10000) & !is.na(p.true.in.ci) & (method!="Multiple imputation (Classifier features unobserved)")]
 p <- ggplot(plot.df, aes(y=mean.est, ymax=mean.est + var.est/2, ymin=mean.est-var.est/2, x=method))
 p <- p + geom_hline(aes(yintercept=0.2),linetype=2)
 p <- p + geom_pointrange() + facet_grid(m~N,as.table=F) + scale_x_discrete(labels=label_wrap_gen(4))
 print(p)
 # get gtable object
 ```
 ]
 ---
 layout:false
 ### .border[Example 2: Estimates of the effect of r]
 .center[
 ```{r echo=FALSE, message=FALSE, warning=FALSE, result='asis', dev='svg', fig.width=8, fig.asp=.625,cache=F}
 #plot.df <- 
 plot.df <- plot.df.example.2B[,':='(method=factor(method,levels=c("Naive","Multiple imputation", "Multiple imputation (Classifier features unobserved)","Regression Calibration","2SLS+gmm","Feasible"),ordered=T),
                                     N=factor(N),
                                     m=factor(m))]
 plot.df <- plot.df[(variable=='g') & (m != 1000) & (m!=500) & (N!=10000) & !is.na(p.true.in.ci) & (method!="Multiple imputation (Classifier features unobserved)")]
 p <- ggplot(plot.df, aes(y=mean.est, ymax=mean.est + var.est/2, ymin=mean.est-var.est/2, x=method))
 p <- p + geom_hline(aes(yintercept=-0.2),linetype=2)
 p <- p + geom_pointrange() + facet_grid(m~N,as.table=F) + scale_x_discrete(labels=label_wrap_gen(4))
 print(p)
 ```
 ]
 ---
 layout:false
 class:large
 ###.border[Takeaways from example 2]
 Bias in the predictive model creates bias in hypothesis tests.
 --
 Bias can be corrected *in this case*. 
 --
 The next scenario has bias that's more tricky. 
 --
 Multiple imputation helps, but doesn't fully correct the bias. 
 ---
 layout:false
 ### .border[When will GMM+2SLS fail?]
 .large[.left-column[![](images/example_3_dag.png)]]
 .right-column[The catch with GMM:
 .emph[Exclusion restriction:] $E[w \varepsilon] = 0$.
 The restriction is violated if a variable $U$ causes both $K$ and $Y$ and $X$ causes $K$ (not visa-versa).
 ]
 ???
 GMM optimizes a model to a system of equations of which the exclusion restriction is one.  So if that assumption isn't true it will biased.
 This is a different assumption than that of OLS or GLM models.
 ---
 layout:false
 ### .border[Example 3: Estimates of the effect of x]
 .center[
 ```{r echo=FALSE, message=FALSE, warning=FALSE, result='asis', dev='svg', fig.width=8, fig.asp=.625,cache=F}
 #plot.df <- 
 plot.df <- plot.df.example.3[,':='(method=factor(method,levels=c("Naive","Multiple imputation", "Multiple imputation (Classifier features unobserved)","Regression Calibration","2SLS+gmm","Feasible"),ordered=T),
                                     N=factor(N),
                                     m=factor(m))]
 plot.df <- plot.df[(variable=='x') & (m != 1000) & (m!=500) & (N!=10000) & (method!="Multiple imputation (Classifier features unobserved)")]
 p <- ggplot(plot.df, aes(y=mean.est, ymax=mean.est + var.est/2, ymin=mean.est-var.est/2, x=method))
 p <- p + geom_hline(aes(yintercept=0.2),linetype=2)
 p <- p + geom_pointrange() + facet_grid(m~N,as.table=F) + scale_x_discrete(labels=label_wrap_gen(4))
 print(p)
 ```
 ]
 ---
 ### .border[Takaways]
 - Attenuation bias can be a big problem with noisy predictors—leading to small and biased estimates. 
 - For more general hypothesis tests or if the predictor is biased, measurement error can lead to false discovery. 
 - It's fixable with validation data—you may not need that much and you should already be getting it. 
 - This means it can be okay poor predictors for hypothesis testing.
 - The ecosystem is underdeveloped, but a lot of methods have been researched.
 - Take advantage of machine learning + big data and get precise estimates when the signal-to-noise ratio is high!
 ---
 layout:false
 ### .border[Future work: Noise in the *outcome*]
 I've been focusing on noise in *covariates.* What if the predictive algorithm is used to measure the *outcome* $y$?
 --
 This isn't a problem in the simplest case (linear regression with homoskedastic errors).  Noise in $y$ is projected into the error term.
 --
 Noise in the outcome is still a problem if errors are heteroskedastic and for GLMs / non-linear regression (e.g., logistic regression).
 --
 Multiple imputation (in theory) could help here. The other method's aren't designed for this case.
 --
 Solving this problem could be an important methodological contribution with a very broad impact. 
 ---
 # .border[Questions?]
 Links to slides:[html](https://teblunthuis.cc/~nathante/slides/ecological_adaptation_ica_2022.html) [pdf](https://teblunthuis.cc/~nathante/slides/ecological_adaptation_ica_2022.pdf)
 Link to a messy git repository:[https://code.communitydata.science/ml_measurement_error_public.git](https://code.communitydata.science/ml_measurement_error_public.git)
 `r icons::fontawesome("envelope")` nathan.teblunthuis@northwestern.edu
 `r icons::fontawesome("twitter")` @groceryheist
 `r icons::fontawesome("globe")` [https://communitydata.science](https://communitydata.science)
 <!-- ### .border[Multiple imputation struggles with discrete variables] -->
 <!-- In my experiments I've found that the 2SLS+GMM method works well with a broader range of data types.  -->
 <!-- To illustrate, Example 3 is the same as Example 2, but with $x$ and $w$ as discrete variables.  -->
 <!-- Practicallly speaking, a continuous "score" $w$ is often available, and my opinion is that usually this is better + more informative than model predictions in all cases.  Continuous validation data may be more difficult to obtain, but it is often possible using techniques like pairwise comparison. -->
 <!-- layout:false -->
 <!-- ### .border[Example 3: Estimates of the effect of x ] -->
 <!-- .center[ -->
 <!-- ```{r echo=FALSE, message=FALSE, warning=FALSE, result='asis', dev='svg', fig.width=8, fig.asp=.625,cache=F} -->
 <!-- #plot.df <-  -->
 <!-- plot.df <- plot.df.example.2[,':='(method=factor(method,levels=c("Naive","Multiple imputation", "Multiple imputation (Classifier features unobserved)","Regression Calibration","2SLS+gmm","Feasible"),ordered=T), -->
 <!--                                      N=factor(N), -->
 <!--                                      m=factor(m))] -->
 <!-- plot.df <- plot.df[(variable=='x') & (m != 1000) & (m!=500) & (N!=5000) & (N!=10000) & !is.na(p.true.in.ci) & (method!="Multiple imputation (Classifier features unobserved)")] -->
 <!-- p <- ggplot(plot.df, aes(y=mean.est, ymax=mean.est + var.est/2, ymin=mean.est-var.est/2, x=method)) -->
 <!-- p <- p + geom_hline(aes(yintercept=0.2),linetype=2) -->
 <!-- p <- p + geom_pointrange() + facet_grid(m~N,as.table=F) + scale_x_discrete(labels=label_wrap_gen(4)) -->
 <!-- print(p) -->
 <!-- # get gtable object -->
 <!-- .large[.left [![](images/example_2_dag.png)]] -->
 <!-- There are at two general ways using a predictive model can introduce bias: *attenuation*, and *confounding.* -->
 <!-- Counfounding can be broken down into 4 types: -->
 <!-- .right[Confounding on $X$ by observed variables -->
 <!-- 	Confounding on $Y$ by observed variables -->
 <!-- ] -->
 <!-- .left[Confounding on $X$ by *un*observed variables -->
 <!-- 	Confounding on $Y$ by *un*observed variables -->
 <!-- ] -->
 <!-- Attenuation and the top-right column can be dealt with relative ease using a few different methods. -->
 <!-- The bottom-left column can be addressed, but so far I haven't found a magic bullet. -->
 <!-- The left column is pretty much a hopeless situation. -->
--- a/presentations/ica_hackathon_2022/ica_hackathon_2022.html
+++ b/presentations/ica_hackathon_2022/ica_hackathon_2022.html
@@ -1,757 +0,0 @@
 <!DOCTYPE html>
 <html lang="" xml:lang="">
  <head>
    <title>How good of a model do you need? Accounting for classification errors in machine assisted content analysis.</title>
    <meta charset="utf-8" />
    <meta name="author" content="Nathan TeBlunthuis" />
    <script src="libs/header-attrs-2.14/header-attrs.js"></script>
    <link href="libs/remark-css-0.0.1/default.css" rel="stylesheet" />
    <link rel="stylesheet" href="my-theme.css" type="text/css" />
    <link rel="stylesheet" href="fontawesome.min.css" type="text/css" />
  </head>
  <body>
    <textarea id="source">
 class: center, middle, narrow
 &lt;script type='javascript'&gt;
 window.MathJax = {
  loader: {load: ['[tex]/xcolor']},
  tex: {packages: {'[+]': ['xcolor']}}
 };
 &lt;/script&gt;
 &lt;div class="my-header"&gt;&lt;/div&gt;
 ###  .title-heading[Unlocking the power of big data: The importance of measurement error in machine assisted content analysis]
 ## Nathan TeBlunthuis
 &lt;img src="images/nu_logo.png" height="170px" style="padding:21px"/&gt; &lt;img src="images/uw_logo.png" height="170px" style="padding:21px"/&gt; &lt;img src="images/cdsc_logo.png" height="170px" style="padding:21px"/&gt;
 nathan.teblunthuis@northwestern.edu
 [https://teblunthuis.cc](https://teblunthuis.cc)
 ???
 This talk will be me presenting my "lab notebook" and not a polished research talk.  Maybe it would be a good week of a graduate seminar? In sum, machine assisted content analysis has unique limitations and threats to validity that I wanted to understand better.  I've learned how the noise introduced by predictive models can result in misleading statistical inferences, but that a sample of human-labeled validation data can often be used to account for this noise and obtain accurate inferences in the end.  Statistical knowledge of this problem and computational tools for addressing are still in development.  My goals for this presentation are to start sharing this information with the community and hopeful to stimulate us to work on extending existing approaches or using them in our work.
 This is going to be a boring talk about some *very* technical material. If you're not that interested please return to your hackathon. Please interrupt me if I'm going too fast for you or if you don't understand something.  I will try to move quickly in the interests of those wishing to wrap up their hackathon projects. I will also ask you to show hands once or twice, if you are already familiar with some concepts that it might be expedient to skip.
 ---
 class:center, middle, inverse
 ## Machine assistent content analysis (MACA)
 ???
 I'm going to start by defining a study design that is increasingly common, especially in Communication and Political Science, but also across the social sciences and beyond. I call it *machine assisted content analysis* (MACA).
 ---
 &lt;div class="my-header"&gt;&lt;/div&gt;
 ### .border[Machine assisted content analysis (MACA) uses machine learning for scientific measurement.]
 .emph[Content analysis:] Statistical analysis of variables measured by human labeling ("coding") of content.  This might be simple categorical labels, or maybe more advanced annotations.
 --
 *Downside:* Human labeling is *a lot* of work.
 --
 .emph[Machine assisted content analysis:] Use a *predictive algorithm* (often trained on human-made labels) to measure variables for use in a downstream *primary analysis.*
 --
 *Downside:*  Algorithms can be *biased* and *inaccurate* in ways that could invalidate the statistical analysis.
 ???
 A machine assisted content analysis can be part of a more complex or more powerful study design (e.g., an experiment, time series analysis &amp;c). 
 ---
 &lt;!-- &lt;div class="my-header"&gt;&lt;/div&gt; --&gt;
 &lt;!-- ### .border[Hypothetical Example: Predicting Racial Harassement in Social Media Comments] --&gt;
 ---
 class:large 
 &lt;div class="my-header"&gt;&lt;/div&gt;
 ### .border[How can MACA go wrong?]
 Algorithms can be *biased* and *error prone* (*noisy*).
 --
 Predictor bias is a potentially difficult problem that requires causal inference methods. I'll focus on *noise* for now.
 --
 Noise in the predictive model introduces bias in the primary analysis.
 --
 .indent[We can reduce and sometimes even *eliminate* this bias introduced by noise.]
 ---
 layout:true
 &lt;div class="my-header"&gt;&lt;/div&gt;
 ### .border[Example 1: An unbiased, but noisy classifier]
 .large[.left-column[![](images/example_1_dag.png)]]
 ???
 Please show hands if you are familiar with causal graphs or baysian networks.  Should I explain what this diagram means?
 ---
 .right-column[
 `\(x\)` is *partly observed* because we have *validation data* `\(x^*\)`.
 ]
 ---
 .right-column[
 `\(x\)` is *partly observed* because we have *validation data* `\(x^*\)`.
 `\(k\)` are the *features* used by the *predictive model* `\(g(k)\)`.
 ]
 ---
 .right-column[
 `\(x\)` is *partly observed* because we have *validation data* `\(x^*\)`.
 `\(k\)` are the *features* used by the *predictive model* `\(g(k)\)`.
 The predictions `\(w\)` are a *proxy variable*  `\(g(k) = \hat{x} = w\)`.
 ]
 ---
 .right-column[
 `\(x\)` is *partly observed* because we have *validation data* `\(x^*\)`.
 `\(k\)` are the *features* used by the *predictive model* `\(g(k)\)`.
 The predictions `\(w\)` are a *proxy variable*  `\(g(k) = \hat{x} = w\)`.
 `\(x = w + \xi\)` because the predictive model makes errors.
 ]
 ---
 layout:true
 &lt;div class="my-header"&gt;&lt;/div&gt;
 ### .border[Noise in a *covariate* creates *attenuation bias*.]
 .large[.left-column[![](images/example_1_dag.png)]]
 ---
 .right-column[
 We want to estimate, `\(y = Bx + \varepsilon\)`, but we estimate `\(y = Bw + \varepsilon\)` instead.
 `\(x = w + \xi\)` because the predictive model makes errors.
 ]
 ---
 .right-column[
 We want to estimate, `\(y = Bx + \varepsilon\)`, but we estimate `\(y = Bw + \varepsilon\)` instead.
 `\(x = w + \xi\)` because the predictive model makes errors.
 Assume `\(g(k)\)` is *unbiased* so `\(E(\xi)=0\)`. Also assume error is *nondifferential* so `\(E(\xi y)=0\)`:
 ]
 ---
 .right-column[
 We want to estimate, `\(y = Bx + \varepsilon\)`, but we estimate `\(y = Bw + \varepsilon\)` instead.
 `\(x = w + \xi\)` because the predictive model makes errors.
 Assume `\(g(k)\)` is *unbiased* so `\(E(\xi)=0\)`. Also assume error is *nondifferential* so `\(E(\xi y)=0\)`:
 `$$\widehat{B_w}^{ols}=\frac{\sum^n_{j=j}{(x_j + \xi_j - \overline{(x + \xi)})}(y_j - \bar{y})}{\sum_{j=1}^n{(x_j + \xi_j - \overline{(x+\xi)})^2}} = \frac{\sum^n_{j=j}{(x_j - \bar{x})(y_j -
 \bar{y})}}{\sum_{j=1}^n{(x_j + \xi_j - \bar{x}){^2}}}$$`
 ]
 ---
 .right-column[
 We want to estimate, `\(y = Bx + \varepsilon\)`, but we estimate `\(y = Bw + \varepsilon\)` instead.
 `\(x = w + \xi\)` because the predictive model makes errors.
 Assume `\(g(k)\)` is *unbiased* so `\(E(\xi)=0\)`. Also assume error is *nondifferential* so `\(E(\xi y)=0\)`:
 `$$\widehat{B_w}^{ols}=\frac{\sum^n_{j=j}{(x_j + \xi_j - \overline{(x + \xi)})}(y_j - \bar{y})}{\sum_{j=1}^n{(x_j + \xi_j - \overline{(x+\xi)})^2}} = \frac{\sum^n_{j=j}{(x_j - \bar{x})(y_j -
 \bar{y})}}{\sum_{j=1}^n{(x_j + \color{red}{\xi_j} - \bar{x})\color{red}{^2}}}$$`
 In this scenario, it's clear that `\(\widehat{B_w}^{ols} &lt; B_x\)`.
 ]
 ???
 Please raise your hands if you're familiar with attenuation bias.  I expect that its covered in some graduate stats classes, but not universally.
 ---
 class:large
 layout:false
 &lt;div class="my-header"&gt;&lt;/div&gt;
 ### .border[Beyond attenuation bias]
 .larger[Measurement error can theaten validity because:]
 - Attenuation bias *spreads* (e.g., to marginal effects as illustrated later).
 --
 - Measurement error can be *differential*— not distributed evenly and possible correlated with `\(x\)`, `\(y\)`, or `\(\varepsilon\)`.
 --
 - *Bias can be away from 0* in GLMs and nonlinear models or if measurement error is differential.
 --
 - *Confounding* if the *predictive model is biased* introducing a correlation the measurement error and the residuals `\((E[\xi\varepsilon]=0)\)`. 
 ---
 class:large
 layout:false
 &lt;div class="my-header"&gt;&lt;/div&gt;
 ### .border[Correcting measurement error]
 There's a vast literature in statistics on measurement error. Mostly about noise you'd find in sensors. Lots of ideas. No magic bullets.
 --
 I'm going to briefly cover 3 different approaches: *multiple imputation*,  *regression calibration* and *2SLS+GMM*.
 --
 These all depend on *validation data*. I'm going to ignore where this comes from, but assume it's a random sample of the hypothesis testing dataset. 
 --
 You can *and should* use it to improve your statistical estimates. 
 ---
 &lt;div class="my-header"&gt;&lt;/div&gt;
 ### .border[Multiple Imputation (MI) treats Measurement Error as a Missing Data Problem]
 1. Use validation data to estimate `\(f(x|w,y)\)`, a probabilistic model of `\(x\)`.
 --
 2. *Sample* `\(m\)` datasets from `\(\widehat{f(x|w,y)}\)`.
 --
 3. Run your analysis on each of the `\(m\)` datasets.
 --
 4. Average the results from the `\(m\)` analyses using Rubin's rules.
 --
 .e[Advantages:] *Very flexible!* Sometimes can work if the predictor $g(k) $ is biased. Good R packages (**`{Amelia}`**, `{mi}`, `{mice}`, `{brms}`).
 --
 .e[Disadvantages:] Results depend on quality of `\(\widehat{f(x|w,y)}\)`; May require more validation data, computationally expensive, statistically inefficient and doesn't seem to benefit much from larger datasets.
 ---
 ### .border[Regression calibration directly adjusts for attenuation bias.]
 1. Use validation data to estimate the errors `\(\hat{\xi}\)`.
 --
 2. Use `\(\hat{\xi}\)` to correct the OLS estimate.
 --
 3. Correct the standard errors using MLE or bootstrapping. 
 --
 .e[Advantages:] Simple, fast.
 --
 .e[Disadvantages:] Limited to OLS models. Requires an unbiased predictor `\(g(k)\)`. R support (`{mecor}` R package) is pretty new. 
 ---
 layout:true
 ### .border[2SLS+GMM is designed for this specific problem]
 .left-column[![](images/Fong_Taylor.png)]
 *Regression calibration with a trick.*
 ---
 .right-column[
 1. Estimate `\(x = w + \xi\)` to obtain `\(\hat{x}\)`. (First-stage LS).
 ]
 ---
 .right-column[
 1. Estimate `\(x = w + \xi\)` to obtain `\(\hat{x}\)`. (First-stage LS).
 2. Estimate `\(y = B^{2sls}\hat{x} + \varepsilon^{2sls}\)`. (Second-stage LS / regression calibration).
 ]
 ---
 .right-column[
 1. Estimate `\(x = w + \xi\)` to obtain `\(\hat{x}\)`. (First-stage LS).
 2. Estimate `\(y = B^{2sls}\hat{x} + \varepsilon^{2sls}\)`.  (Second-stage LS / regression calibration).
 3. Estimate `\(y = B^{val}x^* + \varepsilon^{val}\)`. (Validation dataset model).
 ]
 ---
 .right-column[
 1. Estimate `\(x = w + \xi\)` to obtain `\(\hat{x}\)`. (First-stage LS).
 2. Estimate `\(y = B^{2sls}\hat{x} + \varepsilon^{2sls}\)`.  (Second-stage LS / regression calibration).
 3. Estimate `\(y = B^{val}x^* + \varepsilon^{val}\)`. (Validation dataset model).
 4. Combine `\(B^{val}\)` and `\(B^{2sls}\)` using the generalized method of moments (GMM). 
 ]
 ---
 .right-column[
 1. Estimate `\(x = w + \xi\)` to obtain `\(\hat{x}\)`. (First-stage LS).
 2. Estimate `\(y = B^{2sls}\hat{x} + \varepsilon^{2sls}\)`.  (Second-stage LS / regression calibration).
 3. Estimate `\(y = B^{val}x^* + \varepsilon^{val}\)`. (Validation dataset model).
 4. Combine `\(B^{val}\)` and `\(B^{2sls}\)` using the generalized method of moments (GMM). 
 Advantages: Accurate. Sometimes robust if biased predictor `\(g(k)\)` is biased.  In theory, flexible to any models that can be fit using GMM. 
 ]
 ---
 .right-column[
 1. Estimate `\(x = w + \xi\)` to obtain `\(\hat{x}\)`. (First-stage LS).
 2. Estimate `\(y = B^{2sls}\hat{x} + \varepsilon^{2sls}\)`.  (Second-stage LS / regression calibration).
 3. Estimate `\(y = B^{val}x^* + \varepsilon^{val}\)`. (Validation dataset model).
 4. Combine `\(B^{val}\)` and `\(B^{2sls}\)` using the generalized method of moments (GMM). 
 Advantages: Accurate. Sometimes robust if biased predictor `\(g(k)\)` is biased.  In theory, flexible to any models that can be fit using GMM. 
 Disadvantages: Implementation (`{predictionError}`) is new. API is cumbersome and only supports linear models. Not robust if `\(E(w\varepsilon) \ne 0\)`. GMM may be unfamiliar to audiences. 
 ]
 ---
 layout:false
 ### .border[Testing attention bias correction]
 &lt;div class="my-header"&gt;&lt;/div&gt;
 I've run simulations to test these approaches in several scenarios. 
 The model is not very good: about 70% accurate.
 Most plausible scenario: 
 y is continuous and normal-ish.
 --
 `\(x\)` is binary (human labels) `\(P(x)=0.5\)`.
 --
 `\(w\)` is the *continuous predictor* (e.g., probability) output of `\(f(x)\)` (not binary predictions).
 --
 if `\(w\)` is binary, most methods struggle, but regression calibration and 2SLS+GMM can do okay.
 ---
 layout:false
 ### .border[Example 1: estimator of the effect of x]
 .right-column[
 ![](ica_hackathon_2022_files/figure-html/unnamed-chunk-2-1.svg)&lt;!-- --&gt;
 ]
 .left-column[
 All methods work in this scenario
 Multiple imputation is inefficient. 
 ]
 ---
 ### .border[What about bias?]
 .left-column[
 .large[![](images/example_2_dag.png)]
 ]
 .right-column[
 A few notes on this scenario. 
 `\(B_x = 0.2\)`, `\(B_g=-0.2\)` and `\(sd(\varepsilon)=3\)`. So the signal-to-noise ratio is high. 
 `\(r\)` can be concieved of as a missing feature in the predictive model `\(g(k)\)` that is also correlated with `\(y\)`.
 For example `\(r\)` might be the *race* of a commentor,  `\(x\)` could be *racial harassment*, `\(y\)` whether the commentor gets banned and `\(k\)` only has textual features but human coders can see user profiles to know `\(r\)`. 
 ]
 ---
 layout:false
 ### .border[Example 2: Estimates of the effect of x ]
 .center[
 ![](ica_hackathon_2022_files/figure-html/unnamed-chunk-3-1.svg)&lt;!-- --&gt;
 ]
 ---
 layout:false
 ### .border[Example 2: Estimates of the effect of r]
 .center[
 ![](ica_hackathon_2022_files/figure-html/unnamed-chunk-4-1.svg)&lt;!-- --&gt;
 ]
 ---
 layout:false
 class:large
 ###.border[Takeaways from example 2]
 Bias in the predictive model creates bias in hypothesis tests.
 --
 Bias can be corrected *in this case*. 
 --
 The next scenario has bias that's more tricky. 
 --
 Multiple imputation helps, but doesn't fully correct the bias. 
 ---
 layout:false
 ### .border[When will GMM+2SLS fail?]
 .large[.left-column[![](images/example_3_dag.png)]]
 .right-column[The catch with GMM:
 .emph[Exclusion restriction:] `\(E[w \varepsilon] = 0\)`.
 The restriction is violated if a variable `\(U\)` causes both `\(K\)` and `\(Y\)` and `\(X\)` causes `\(K\)` (not visa-versa).
 ]
 ???
 GMM optimizes a model to a system of equations of which the exclusion restriction is one.  So if that assumption isn't true it will biased.
 This is a different assumption than that of OLS or GLM models.
 ---
 layout:false
 ### .border[Example 3: Estimates of the effect of x]
 .center[
 ![](ica_hackathon_2022_files/figure-html/unnamed-chunk-5-1.svg)&lt;!-- --&gt;
 ]
 ---
 ### .border[Takaways]
 - Attenuation bias can be a big problem with noisy predictors—leading to small and biased estimates. 
 - For more general hypothesis tests or if the predictor is biased, measurement error can lead to false discovery. 
 - It's fixable with validation data—you may not need that much and you should already be getting it. 
 - This means it can be okay poor predictors for hypothesis testing.
 - The ecosystem is underdeveloped, but a lot of methods have been researched.
 - Take advantage of machine learning + big data and get precise estimates when the signal-to-noise ratio is high!
 ---
 layout:false
 ### .border[Future work: Noise in the *outcome*]
 I've been focusing on noise in *covariates.* What if the predictive algorithm is used to measure the *outcome* `\(y\)`?
 --
 This isn't a problem in the simplest case (linear regression with homoskedastic errors).  Noise in `\(y\)` is projected into the error term.
 --
 Noise in the outcome is still a problem if errors are heteroskedastic and for GLMs / non-linear regression (e.g., logistic regression).
 --
 Multiple imputation (in theory) could help here. The other method's aren't designed for this case.
 --
 Solving this problem could be an important methodological contribution with a very broad impact. 
 ---
 # .border[Questions?]
 Links to slides:[html](https://teblunthuis.cc/~nathante/slides/ecological_adaptation_ica_2022.html) [pdf](https://teblunthuis.cc/~nathante/slides/ecological_adaptation_ica_2022.pdf)
 Link to a messy git repository: 
 &lt;i class="fa fa-envelope" aria-hidden='true'&gt;&lt;/i&gt; nathan.teblunthuis@northwestern.edu
 &lt;i class="fa fa-twitter" aria-hidden='true'&gt;&lt;/i&gt; @groceryheist
 &lt;i class="fa fa-globe" aria-hidden='true'&gt;&lt;/i&gt; [https://communitydata.science](https://communitydata.science)
 &lt;!-- ### .border[Multiple imputation struggles with discrete variables] --&gt;
 &lt;!-- In my experiments I've found that the 2SLS+GMM method works well with a broader range of data types.  --&gt;
 &lt;!-- To illustrate, Example 3 is the same as Example 2, but with `\(x\)` and `\(w\)` as discrete variables.  --&gt;
 &lt;!-- Practicallly speaking, a continuous "score" `\(w\)` is often available, and my opinion is that usually this is better + more informative than model predictions in all cases.  Continuous validation data may be more difficult to obtain, but it is often possible using techniques like pairwise comparison. --&gt;
 &lt;!-- layout:false --&gt;
 &lt;!-- ### .border[Example 3: Estimates of the effect of x ] --&gt;
 &lt;!-- .center[ --&gt;
 &lt;!-- ```{r echo=FALSE, message=FALSE, warning=FALSE, result='asis', dev='svg', fig.width=8, fig.asp=.625,cache=F} --&gt;
 &lt;!-- #plot.df &lt;-  --&gt;
 &lt;!-- plot.df &lt;- plot.df.example.2[,':='(method=factor(method,levels=c("Naive","Multiple imputation", "Multiple imputation (Classifier features unobserved)","Regression Calibration","2SLS+gmm","Feasible"),ordered=T), --&gt;
 &lt;!--                                      N=factor(N), --&gt;
 &lt;!--                                      m=factor(m))] --&gt;
 &lt;!-- plot.df &lt;- plot.df[(variable=='x') &amp; (m != 1000) &amp; (m!=500) &amp; (N!=5000) &amp; (N!=10000) &amp; !is.na(p.true.in.ci) &amp; (method!="Multiple imputation (Classifier features unobserved)")] --&gt;
 &lt;!-- p &lt;- ggplot(plot.df, aes(y=mean.est, ymax=mean.est + var.est/2, ymin=mean.est-var.est/2, x=method)) --&gt;
 &lt;!-- p &lt;- p + geom_hline(aes(yintercept=0.2),linetype=2) --&gt;
 &lt;!-- p &lt;- p + geom_pointrange() + facet_grid(m~N,as.table=F) + scale_x_discrete(labels=label_wrap_gen(4)) --&gt;
 &lt;!-- print(p) --&gt;
 &lt;!-- # get gtable object --&gt;
 &lt;!-- .large[.left [![](images/example_2_dag.png)]] --&gt;
 &lt;!-- There are at two general ways using a predictive model can introduce bias: *attenuation*, and *confounding.* --&gt;
 &lt;!-- Counfounding can be broken down into 4 types: --&gt;
 &lt;!-- .right[Confounding on `\(X\)` by observed variables --&gt;
 &lt;!-- 	Confounding on `\(Y\)` by observed variables --&gt;
 &lt;!-- ] --&gt;
 &lt;!-- .left[Confounding on `\(X\)` by *un*observed variables --&gt;
 &lt;!-- 	Confounding on `\(Y\)` by *un*observed variables --&gt;
 &lt;!-- ] --&gt;
 &lt;!-- Attenuation and the top-right column can be dealt with relative ease using a few different methods. --&gt;
 &lt;!-- The bottom-left column can be addressed, but so far I haven't found a magic bullet. --&gt;
 &lt;!-- The left column is pretty much a hopeless situation. --&gt;
    </textarea>
 <style data-target="print-only">@media screen {.remark-slide-container{display:block;}.remark-slide-scaler{box-shadow:none;}}</style>
 <script src="libs/remark-latest.min.js"></script>
 <script>var slideshow = remark.create({
 "highlightStyle": "github",
 "ratio": "16:9",
 "countIncrementalSlides": true,
 "slideNumberFormat": "<div class=\"progress-bar-container\">\n  <div class=\"progress-bar\" style=\"width: calc(%current% / %total% * 100%);\">\n  </div>\n</div>\n"
 });
 if (window.HTMLWidgets) slideshow.on('afterShowSlide', function (slide) {
  window.dispatchEvent(new Event('resize'));
 });
 (function(d) {
  var s = d.createElement("style"), r = d.querySelector(".remark-slide-scaler");
  if (!r) return;
  s.type = "text/css"; s.innerHTML = "@page {size: " + r.style.width + " " + r.style.height +"; }";
  d.head.appendChild(s);
 })(document);
 (function(d) {
  var el = d.getElementsByClassName("remark-slides-area");
  if (!el) return;
  var slide, slides = slideshow.getSlides(), els = el[0].children;
  for (var i = 1; i < slides.length; i++) {
    slide = slides[i];
    if (slide.properties.continued === "true" || slide.properties.count === "false") {
      els[i - 1].className += ' has-continuation';
    }
  }
  var s = d.createElement("style");
  s.type = "text/css"; s.innerHTML = "@media print { .has-continuation { display: none; } }";
  d.head.appendChild(s);
 })(document);
 // delete the temporary CSS (for displaying all slides initially) when the user
 // starts to view slides
 (function() {
  var deleted = false;
  slideshow.on('beforeShowSlide', function(slide) {
    if (deleted) return;
    var sheets = document.styleSheets, node;
    for (var i = 0; i < sheets.length; i++) {
      node = sheets[i].ownerNode;
      if (node.dataset["target"] !== "print-only") continue;
      node.parentNode.removeChild(node);
    }
    deleted = true;
  });
 })();
 // add `data-at-shortcutkeys` attribute to <body> to resolve conflicts with JAWS
 // screen reader (see PR #262)
 (function(d) {
  let res = {};
  d.querySelectorAll('.remark-help-content table tr').forEach(tr => {
    const t = tr.querySelector('td:nth-child(2)').innerText;
    tr.querySelectorAll('td:first-child .key').forEach(key => {
      const k = key.innerText;
      if (/^[a-z]$/.test(k)) res[k] = t;  // must be a single letter (key)
    });
  });
  d.body.setAttribute('data-at-shortcutkeys', JSON.stringify(res));
 })(document);
 (function() {
  "use strict"
  // Replace <script> tags in slides area to make them executable
  var scripts = document.querySelectorAll(
    '.remark-slides-area .remark-slide-container script'
  );
  if (!scripts.length) return;
  for (var i = 0; i < scripts.length; i++) {
    var s = document.createElement('script');
    var code = document.createTextNode(scripts[i].textContent);
    s.appendChild(code);
    var scriptAttrs = scripts[i].attributes;
    for (var j = 0; j < scriptAttrs.length; j++) {
      s.setAttribute(scriptAttrs[j].name, scriptAttrs[j].value);
    }
    scripts[i].parentElement.replaceChild(s, scripts[i]);
  }
 })();
 (function() {
  var links = document.getElementsByTagName('a');
  for (var i = 0; i < links.length; i++) {
    if (/^(https?:)?\/\//.test(links[i].getAttribute('href'))) {
      links[i].target = '_blank';
    }
  }
 })();</script>
 <script>
 slideshow._releaseMath = function(el) {
  var i, text, code, codes = el.getElementsByTagName('code');
  for (i = 0; i < codes.length;) {
    code = codes[i];
    if (code.parentNode.tagName !== 'PRE' && code.childElementCount === 0) {
      text = code.textContent;
      if (/^\\\((.|\s)+\\\)$/.test(text) || /^\\\[(.|\s)+\\\]$/.test(text) ||
          /^\$\$(.|\s)+\$\$$/.test(text) ||
          /^\\begin\{([^}]+)\}(.|\s)+\\end\{[^}]+\}$/.test(text)) {
        code.outerHTML = code.innerHTML;  // remove <code></code>
        continue;
      }
    }
    i++;
  }
 };
 slideshow._releaseMath(document);
 </script>
 <!-- dynamically load mathjax for compatibility with self-contained -->
 <script>
 (function () {
  var script = document.createElement('script');
  script.type = 'text/javascript';
  script.src  = 'https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-MML-AM_CHTML';
  if (location.protocol !== 'file:' && /^https?:/.test(script.src))
    script.src  = script.src.replace(/^https?:/, '');
  document.getElementsByTagName('head')[0].appendChild(script);
 })();
 </script>
  </body>
 </html>
--- a/presentations/ica_hackathon_2022/ica_hackathon_2022.pdf
+++ b/presentations/ica_hackathon_2022/ica_hackathon_2022.pdf
--- a/presentations/ica_hackathon_2022/images/Fong_Taylor.png
+++ b/presentations/ica_hackathon_2022/images/Fong_Taylor.png
--- a/presentations/ica_hackathon_2022/images/cdsc_logo.png
+++ b/presentations/ica_hackathon_2022/images/cdsc_logo.png
--- a/presentations/ica_hackathon_2022/images/example_1_dag.png
+++ b/presentations/ica_hackathon_2022/images/example_1_dag.png
@@ -1 +0,0 @@
 ../../../charts/example_1_dag/example_1_dag.png
--- a/presentations/ica_hackathon_2022/images/example_1_dag.svg
+++ b/presentations/ica_hackathon_2022/images/example_1_dag.svg
@@ -1 +0,0 @@
 ../../../charts/example_1_dag/example_1_dag.svg
--- a/presentations/ica_hackathon_2022/images/example_2_dag.png
+++ b/presentations/ica_hackathon_2022/images/example_2_dag.png
@@ -1 +0,0 @@
 ../../../charts/example_2_dag/example_2_dag.png
--- a/presentations/ica_hackathon_2022/images/example_2_dag.svg
+++ b/presentations/ica_hackathon_2022/images/example_2_dag.svg
@@ -1 +0,0 @@
 ../../../charts/example_2_dag/example_2_dag.svg
--- a/presentations/ica_hackathon_2022/images/example_3_dag.png
+++ b/presentations/ica_hackathon_2022/images/example_3_dag.png
@@ -1 +0,0 @@
 ../../../charts/example_3_dag/example_3_dag.png
--- a/presentations/ica_hackathon_2022/images/example_3_dag.svg
+++ b/presentations/ica_hackathon_2022/images/example_3_dag.svg
@@ -1 +0,0 @@
 ../../../charts/example_3_dag/example_3_dag.svg
--- a/presentations/ica_hackathon_2022/images/no_xmark.png
+++ b/presentations/ica_hackathon_2022/images/no_xmark.png
--- a/presentations/ica_hackathon_2022/images/nu_logo.png
+++ b/presentations/ica_hackathon_2022/images/nu_logo.png
--- a/presentations/ica_hackathon_2022/images/uw_logo.png
+++ b/presentations/ica_hackathon_2022/images/uw_logo.png
--- a/presentations/ica_hackathon_2022/images/yes_check.png
+++ b/presentations/ica_hackathon_2022/images/yes_check.png
--- a/presentations/ica_hackathon_2022/libs/header-attrs-2.13/header-attrs.js
+++ b/presentations/ica_hackathon_2022/libs/header-attrs-2.13/header-attrs.js
@@ -1,12 +0,0 @@
 // Pandoc 2.9 adds attributes on both header and div. We remove the former (to
 // be compatible with the behavior of Pandoc < 2.8).
 document.addEventListener('DOMContentLoaded', function(e) {
  var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
  var i, h, a;
  for (i = 0; i < hs.length; i++) {
    h = hs[i];
    if (!/^h[1-6]$/i.test(h.tagName)) continue;  // it should be a header h1-h6
    a = h.attributes;
    while (a.length > 0) h.removeAttribute(a[0].name);
  }
 });
--- a/presentations/ica_hackathon_2022/libs/header-attrs-2.14/header-attrs.js
+++ b/presentations/ica_hackathon_2022/libs/header-attrs-2.14/header-attrs.js
@@ -1,12 +0,0 @@
 // Pandoc 2.9 adds attributes on both header and div. We remove the former (to
 // be compatible with the behavior of Pandoc < 2.8).
 document.addEventListener('DOMContentLoaded', function(e) {
  var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
  var i, h, a;
  for (i = 0; i < hs.length; i++) {
    h = hs[i];
    if (!/^h[1-6]$/i.test(h.tagName)) continue;  // it should be a header h1-h6
    a = h.attributes;
    while (a.length > 0) h.removeAttribute(a[0].name);
  }
 });
--- a/presentations/ica_hackathon_2022/libs/local_mathjax.js
+++ b/presentations/ica_hackathon_2022/libs/local_mathjax.js
--- a/presentations/ica_hackathon_2022/libs/remark-css-0.0.1/default.css
+++ b/presentations/ica_hackathon_2022/libs/remark-css-0.0.1/default.css
@@ -1,72 +0,0 @@
 a, a > code {
  color: rgb(249, 38, 114);
  text-decoration: none;
 }
 .footnote {
  position: absolute;
  bottom: 3em;
  padding-right: 4em;
  font-size: 90%;
 }
 .remark-code-line-highlighted     { background-color: #ffff88; }
 .inverse {
  background-color: #272822;
  color: #d6d6d6;
  text-shadow: 0 0 20px #333;
 }
 .inverse h1, .inverse h2, .inverse h3 {
  color: #f3f3f3;
 }
 /* Two-column layout */
 .left-column {
  color: #777;
  width: 20%;
  height: 92%;
  float: left;
 }
 .left-column h2:last-of-type, .left-column h3:last-child {
  color: #000;
 }
 .right-column {
  width: 75%;
  float: right;
  padding-top: 1em;
 }
 .pull-left {
  float: left;
  width: 47%;
 }
 .pull-right {
  float: right;
  width: 47%;
 }
 .pull-right + * {
  clear: both;
 }
 img, video, iframe {
  max-width: 100%;
 }
 blockquote {
  border-left: solid 5px lightgray;
  padding-left: 1em;
 }
 .remark-slide table {
  margin: auto;
  border-top: 1px solid #666;
  border-bottom: 1px solid #666;
 }
 .remark-slide table thead th { border-bottom: 1px solid #ddd; }
 th, td { padding: 5px; }
 .remark-slide thead, .remark-slide tfoot, .remark-slide tr:nth-child(even) { background: #eee }
@page { margin: 0; }
@media print {
  .remark-slide-scaler {
    width: 100% !important;
    height: 100% !important;
    transform: scale(1) !important;
    top: 0 !important;
    left: 0 !important;
  }
 }
--- a/presentations/ica_hackathon_2022/libs/remark-latest.min.js
+++ b/presentations/ica_hackathon_2022/libs/remark-latest.min.js
--- a/presentations/ica_hackathon_2022/mathjax
+++ b/presentations/ica_hackathon_2022/mathjax
@@ -1 +0,0 @@
 /home/nathante/mathjax
--- a/presentations/ica_hackathon_2022/my-theme.css
+++ b/presentations/ica_hackathon_2022/my-theme.css
@@ -1,145 +0,0 @@
 .huge { font-size: 170% }
 .large { font-size: 140% }
 .small { font-size: 70% }
 .tiny{font-size: 40%}
 /* .inverse { */
 /*   background-color: #272822; */
 /*   color: #d6d6d6; */
 /*   text-shadow: 0 0 20px #333; */
 /* } */
 .header-image{
    width:650px;
    display:inline-block;
 }
 .large img{
    width:250px;
 }
 .emph{
    color:#4e2a84;
    font-weight: bolder;
 }
 .mygreen{
    color:#2eab20;
 }
 .myyellow{
    color:#AB9d20;
 }
 .myblue{
    color:#2073AB;
 }
 .myred{
    color:#AB202E;
 }
 .cite{
    font-weight: lighter;
    font-size:60%;
    font-family:"times", "Helvetica","serif";
    position: fixed;
    bottom: 16px;
  }
 .left-column {
 color: #777;
 width: 40%;
 height: 100%;
 float: left;
 }
 .left-column h2:last-of-type, .left-column h3:last-child {
 color: #000;
 }
 .right-column {
 width: 60%;
 float: right;
 padding-top: 1em;
 }
 .hypo-mark img{
    width:120px;
    position: fixed;
    bottom: 545px;
    left: 1050px;
 }
 .hypo-mark-1 img{
 }
 .hypo-mark-2 img{
    bottom:480px;    
 }
 .hypo-mark-3 img{
    bottom:480px;
    left:1050px;
 }
 .remark-slide-number {
  position: inherit;
 }
 .remark-slide-number .progress-bar-container {
  position: absolute;
  bottom: 0;
  height: 4px;
  display: block;
  left: 0;
  right: 0;
 }
 a, a > code{
    color:#4e2a84;
    text-decoration:none;
 }
 .remark-slide-number .progress-bar {
  height: 100%;
  background-color: #4e2a84;
 }
 .border{
      border-bottom: #4e2a84 solid 0.7mm;
      padding: 3px;
      display:inline-block;
 }
 div.my-header {
    background-color: #4e2a84;
    background: -webkit-linear-gradient(left, #604982, #4E2A84 30%, #5820AB 70%, #5820AB);
    position: fixed;
    top: 0px;
    left: 0px;
    height: 26px;
    width: 100%;
    text-align: left;
 }
 .inverse {
    background-color: #322e37;
    color: #FCFBFD;
    text-shadow: 0 0 20px #333;
 }
 .inverse h1, .inverse h2, .inverse h3, .inverse h4{
    color: #FCFBFD;
    text-shadow: 0 0 20px #333;
 }
 .remark-slide thead, .remark-slide tfoot, .remark-slide tr:nth-child(2n) {
 	background: #d7c9ec;
 }
 .narrow{
    padding-left: 150px;
    padding-right: 150px;
 }
--- a/presentations/ica_hackathon_2022/remembr.RDS
+++ b/presentations/ica_hackathon_2022/remembr.RDS
--- a/presentations/ica_hackathon_2022/resources/template.html
+++ b/presentations/ica_hackathon_2022/resources/template.html
@@ -1,793 +0,0 @@
 <!DOCTYPE html>
 <html$if(lang)$ lang="$lang$"$endif$$if(dir)$ dir="$dir$"$endif$>
 <head>
  <meta charset="utf-8">
  <meta name="generator" content="pandoc">
 $for(author-meta)$
  <meta name="author" content="$author-meta$" />
 $endfor$
 $if(date-meta)$
  <meta name="dcterms.date" content="$date-meta$" />
 $endif$
 $if(keywords)$
  <meta name="keywords" content="$for(keywords)$$keywords$$sep$, $endfor$">
 $endif$
  <title>$if(title-prefix)$$title-prefix$ – $endif$$pagetitle$</title>
  <meta name="apple-mobile-web-app-capable" content="yes">
  <meta name="apple-mobile-web-app-status-bar-style" content="black-translucent">
  <meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no, minimal-ui">
  <link rel="stylesheet" href="$revealjs-url$/css/reveal.css"/>
 $if(highlightjs)$
 <link rel="stylesheet"
      href="$highlightjs$/$if(highlightjs-theme)$$highlightjs-theme$$else$default$endif$.css"
      $if(html5)$$else$type="text/css" $endif$/>
 <script src="$highlightjs$/highlight.js"></script>
 $endif$
 $if(highlighting-css)$
 <style type="text/css">
 $highlighting-css$
 </style>
 $endif$
 $if(theme)$
 <link rel="stylesheet" href="$revealjs-url$/css/theme/$theme$.css" id="theme">
 $endif$
 $if(theme-dark)$
 <style type="text/css">
 .reveal section img {
  background: rgba(255, 255, 255, 0.85);
 }
 </style>
 $endif$
  <!-- some tweaks to reveal css -->
  <style type="text/css">
    .reveal h1 { font-size: 2.0em; }
    .reveal h2 { font-size: 1.5em;  }
    .reveal h3 { font-size: 1.25em;	}
    .reveal h4 { font-size: 1em;	}
    .reveal .slides>section,
    .reveal .slides>section>section {
      padding: 0px 0px;
    }
 $if(center)$
 $else$
     .reveal .title {
        margin-top: 125px;
        margin-bottom: 50px;
     }
 $endif$
    .reveal table {
      border-width: 1px;
      border-spacing: 2px;
      border-style: dotted;
      border-color: gray;
      border-collapse: collapse;
      font-size: 0.7em;
    }
    .reveal table th {
      border-width: 1px;
      padding-left: 10px;
      padding-right: 25px;
      font-weight: bold;
      border-style: dotted;
      border-color: gray;
    }
    .reveal table td {
      border-width: 1px;
      padding-left: 10px;
      padding-right: 25px;
      border-style: dotted;
      border-color: gray;
    }
 $if(plugin-menu)$
 $if(plugin-chalkboard)$
    .reveal .slide-menu-button {
      left: 105px !important;
    }
 $endif$
 $endif$
  </style>
    <style type="text/css">code{white-space: pre;}</style>
 $if(css)$
 $for(css)$
    <link rel="stylesheet" href="$css$"/>
 $endfor$
 $endif$
 <!-- Printing and PDF exports -->
 <script id="paper-css" type="application/dynamic-css">
 /* Default Print Stylesheet Template
   by Rob Glazebrook of CSSnewbie.com
   Last Updated: June 4, 2008
   Feel free (nay, compelled) to edit, append, and
   manipulate this file as you see fit. */
@media print {
 	/* SECTION 1: Set default width, margin, float, and
 	   background. This prevents elements from extending
 	   beyond the edge of the printed page, and prevents
 	   unnecessary background images from printing */
 	html {
 		background: #fff;
 		width: auto;
 		height: auto;
 		overflow: visible;
 	}
 	body {
 		background: #fff;
 		font-size: 20pt;
 		width: auto;
 		height: auto;
 		border: 0;
 		margin: 0 5%;
 		padding: 0;
 		overflow: visible;
 		float: none !important;
 	}
 	/* SECTION 2: Remove any elements not needed in print.
 	   This would include navigation, ads, sidebars, etc. */
 	.nestedarrow,
 	.controls,
 	.fork-reveal,
 	.share-reveal,
 	.state-background,
 	.reveal .progress,
 	.reveal .backgrounds {
 		display: none !important;
 	}
 	/* SECTION 3: Set body font face, size, and color.
 	   Consider using a serif font for readability. */
 	body, p, td, li, div {
 		font-size: 20pt!important;
 		font-family: Georgia, "Times New Roman", Times, serif !important;
 		color: #000;
 	}
 	/* SECTION 4: Set heading font face, sizes, and color.
 	   Differentiate your headings from your body text.
 	   Perhaps use a large sans-serif for distinction. */
 	h1,h2,h3,h4,h5,h6 {
 		color: #000!important;
 		height: auto;
 		line-height: normal;
 		font-family: Georgia, "Times New Roman", Times, serif !important;
 		text-shadow: 0 0 0 #000 !important;
 		text-align: left;
 		letter-spacing: normal;
 	}
 	/* Need to reduce the size of the fonts for printing */
 	h1 { font-size: 28pt !important;  }
 	h2 { font-size: 24pt !important; }
 	h3 { font-size: 22pt !important; }
 	h4 { font-size: 22pt !important; font-variant: small-caps; }
 	h5 { font-size: 21pt !important; }
 	h6 { font-size: 20pt !important; font-style: italic; }
 	/* SECTION 5: Make hyperlinks more usable.
 	   Ensure links are underlined, and consider appending
 	   the URL to the end of the link for usability. */
 	a:link,
 	a:visited {
 		color: #000 !important;
 		font-weight: bold;
 		text-decoration: underline;
 	}
 	/*
 	.reveal a:link:after,
 	.reveal a:visited:after {
 		content: " (" attr(href) ") ";
 		color: #222 !important;
 		font-size: 90%;
 	}
 	*/
 	/* SECTION 6: more reveal.js specific additions by @skypanther */
 	ul, ol, div, p {
 		visibility: visible;
 		position: static;
 		width: auto;
 		height: auto;
 		display: block;
 		overflow: visible;
 		margin: 0;
 		text-align: left !important;
 	}
 	.reveal pre,
 	.reveal table {
 		margin-left: 0;
 		margin-right: 0;
 	}
 	.reveal pre code {
 		padding: 20px;
 		border: 1px solid #ddd;
 	}
 	.reveal blockquote {
 		margin: 20px 0;
 	}
 	.reveal .slides {
 		position: static !important;
 		width: auto !important;
 		height: auto !important;
 		left: 0 !important;
 		top: 0 !important;
 		margin-left: 0 !important;
 		margin-top: 0 !important;
 		padding: 0 !important;
 		zoom: 1 !important;
 		overflow: visible !important;
 		display: block !important;
 		text-align: left !important;
 		-webkit-perspective: none;
 		   -moz-perspective: none;
 		    -ms-perspective: none;
 		        perspective: none;
 		-webkit-perspective-origin: 50% 50%;
 		   -moz-perspective-origin: 50% 50%;
 		    -ms-perspective-origin: 50% 50%;
 		        perspective-origin: 50% 50%;
 	}
 	.reveal .slides section {
 		visibility: visible !important;
 		position: static !important;
 		width: auto !important;
 		height: auto !important;
 		display: block !important;
 		overflow: visible !important;
 		left: 0 !important;
 		top: 0 !important;
 		margin-left: 0 !important;
 		margin-top: 0 !important;
 		padding: 60px 20px !important;
 		z-index: auto !important;
 		opacity: 1 !important;
 		page-break-after: always !important;
 		-webkit-transform-style: flat !important;
 		   -moz-transform-style: flat !important;
 		    -ms-transform-style: flat !important;
 		        transform-style: flat !important;
 		-webkit-transform: none !important;
 		   -moz-transform: none !important;
 		    -ms-transform: none !important;
 		        transform: none !important;
 		-webkit-transition: none !important;
 		   -moz-transition: none !important;
 		    -ms-transition: none !important;
 		        transition: none !important;
 	}
 	.reveal .slides section.stack {
 		padding: 0 !important;
 	}
 	.reveal section:last-of-type {
 		page-break-after: avoid !important;
 	}
 	.reveal section .fragment {
 		opacity: 1 !important;
 		visibility: visible !important;
 		-webkit-transform: none !important;
 		   -moz-transform: none !important;
 		    -ms-transform: none !important;
 		        transform: none !important;
 	}
 	.reveal section img {
 		display: block;
 		margin: 15px 0px;
 		background: rgba(255,255,255,1);
 		border: 1px solid #666;
 		box-shadow: none;
 	}
 	.reveal section small {
 		font-size: 0.8em;
 	}
 }  
 </script>
 <script id="pdf-css" type="application/dynamic-css">
 /**
 * This stylesheet is used to print reveal.js
 * presentations to PDF.
 *
 * https://github.com/hakimel/reveal.js#pdf-export
 */
 * {
 	-webkit-print-color-adjust: exact;
 }
 body {
 	margin: 0 auto !important;
 	border: 0;
 	padding: 0;
 	float: none !important;
 	overflow: visible;
 }
 html {
 	width: 100%;
 	height: 100%;
 	overflow: visible;
 }
 /* Remove any elements not needed in print. */
 .nestedarrow,
 .reveal .controls,
 .reveal .progress,
 .reveal .playback,
 .reveal.overview,
 .fork-reveal,
 .share-reveal,
 .state-background {
 	display: none !important;
 }
 h1, h2, h3, h4, h5, h6 {
 	text-shadow: 0 0 0 #000 !important;
 }
 .reveal pre code {
 	overflow: hidden !important;
 	font-family: Courier, 'Courier New', monospace !important;
 }
 ul, ol, div, p {
 	visibility: visible;
 	position: static;
 	width: auto;
 	height: auto;
 	display: block;
 	overflow: visible;
 	margin: auto;
 }
 .reveal {
 	width: auto !important;
 	height: auto !important;
 	overflow: hidden !important;
 }
 .reveal .slides {
 	position: static;
 	width: 100%;
 	height: auto;
 	left: auto;
 	top: auto;
 	margin: 0 !important;
 	padding: 0 !important;
 	overflow: visible;
 	display: block;
 	-webkit-perspective: none;
 	   -moz-perspective: none;
 	    -ms-perspective: none;
 	        perspective: none;
 	-webkit-perspective-origin: 50% 50%; /* there isn't a none/auto value but 50-50 is the default */
 	   -moz-perspective-origin: 50% 50%;
 	    -ms-perspective-origin: 50% 50%;
 	        perspective-origin: 50% 50%;
 }
 .reveal .slides section {
 	page-break-after: always !important;
 	visibility: visible !important;
 	position: relative !important;
 	display: block !important;
 	position: relative !important;
 	margin: 0 !important;
 	padding: 0 !important;
 	box-sizing: border-box !important;
 	min-height: 1px;
 	opacity: 1 !important;
 	-webkit-transform-style: flat !important;
 	   -moz-transform-style: flat !important;
 	    -ms-transform-style: flat !important;
 	        transform-style: flat !important;
 	-webkit-transform: none !important;
 	   -moz-transform: none !important;
 	    -ms-transform: none !important;
 	        transform: none !important;
 }
 .reveal section.stack {
 	margin: 0 !important;
 	padding: 0 !important;
 	page-break-after: avoid !important;
 	height: auto !important;
 	min-height: auto !important;
 }
 .reveal img {
 	box-shadow: none;
 }
 .reveal .roll {
 	overflow: visible;
 	line-height: 1em;
 }
 /* Slide backgrounds are placed inside of their slide when exporting to PDF */
 .reveal section .slide-background {
 	display: block !important;
 	position: absolute;
 	top: 0;
 	left: 0;
 	width: 100%;
 	z-index: -1;
 }
 /* All elements should be above the slide-background */
 .reveal section>* {
 	position: relative;
 	z-index: 1;
 }
 /* Display slide speaker notes when 'showNotes' is enabled */
 .reveal .speaker-notes-pdf {
 	display: block;
 	width: 100%;
 	max-height: none;
 	left: auto;
 	top: auto;
 	z-index: 100;
 }
 /* Display slide numbers when 'slideNumber' is enabled */
 .reveal .slide-number-pdf {
 	display: block;
 	position: absolute;
 	font-size: 14px;
 }
 </script>
 <script>
 var style = document.createElement( 'style' );
 style.type = 'text/css';
 var style_script_id = window.location.search.match( /print-pdf/gi ) ? 'pdf-css' : 'paper-css';
 var style_script = document.getElementById(style_script_id).text;
 style.innerHTML = style_script;
 document.getElementsByTagName('head')[0].appendChild(style);
 </script>
 $for(header-includes)$
    $header-includes$
 $endfor$
 </head>
 <body>
 $for(include-before)$
 $include-before$
 $endfor$
  <div class="reveal">
    <div class="slides">
 <!--
 $if(title)$
 <section>
    <h1 class="title">$title$</h1>
 $if(subtitle)$
  <h1 class="subtitle">$subtitle$</h1>
 $endif$
 $for(author)$
    <h2 class="author">$author$</h2>
 $endfor$
 $if(date)$
    <h3 class="date">$date$</h3>
 $endif$
 </section>
 $endif$
 $if(toc)$
 <section id="$idprefix$TOC">
 $toc$
 </section>
 $endif$
 -->
 $body$
    </div>
  </div>
  <script src="$revealjs-url$/lib/js/head.min.js"></script>
  <script src="$revealjs-url$/js/reveal.js"></script>
  <script>
      // Full list of configuration options available at:
      // https://github.com/hakimel/reveal.js#configuration
      Reveal.initialize({
 $if(controls)$
        // Display controls in the bottom right corner
        controls: $controls$,
 $endif$
 $if(progress)$
        // Display a presentation progress bar
        progress: $progress$,
 $endif$
 $if(slideNumber)$
        // Display the page number of the current slide
        slideNumber: $slideNumber$,
 $endif$
 $if(history)$
        // Push each slide change to the browser history
        history: $history$,
 $endif$
 $if(keyboard)$
        // Enable keyboard shortcuts for navigation
        keyboard: $keyboard$,
 $endif$
 $if(overview)$
        // Enable the slide overview mode
        overview: $overview$,
 $endif$
 $if(center)$
        // Vertical centering of slides
        center: $center$,
 $endif$
 $if(touch)$
        // Enables touch navigation on devices with touch input
        touch: $touch$,
 $endif$
 $if(loop)$
        // Loop the presentation
        loop: $loop$,
 $endif$
 $if(rtl)$
        // Change the presentation direction to be RTL
        rtl: $rtl$,
 $endif$
 $if(fragments)$
        // Turns fragments on and off globally
        fragments: $fragments$,
 $endif$
 $if(embedded)$
        // Flags if the presentation is running in an embedded mode,
        // i.e. contained within a limited portion of the screen
        embedded: $embedded$,
 $endif$
 $if(help)$
        // Flags if we should show a help overlay when the questionmark
        // key is pressed
        help: $help$,
 $endif$
 $if(autoSlide)$
        // Number of milliseconds between automatically proceeding to the
        // next slide, disabled when set to 0, this value can be overwritten
        // by using a data-autoslide attribute on your slides
        autoSlide: $autoSlide$,
 $endif$
 $if(autoSlideStoppable)$
        // Stop auto-sliding after user input
        autoSlideStoppable: $autoSlideStoppable$,
 $endif$
 $if(mouseWheel)$
        // Enable slide navigation via mouse wheel
        mouseWheel: $mouseWheel$,
 $endif$
 $if(hideAddressBar)$
        // Hides the address bar on mobile devices
        hideAddressBar: $hideAddressBar$,
 $endif$
 $if(previewLinks)$
        // Opens links in an iframe preview overlay
        previewLinks: $previewLinks$,
 $endif$
 $if(transition)$
        // Transition style
        transition: '$transition$', // none/fade/slide/convex/concave/zoom
 $endif$
 $if(transitionSpeed)$
        // Transition speed
        transitionSpeed: '$transitionSpeed$', // default/fast/slow
 $endif$
 $if(backgroundTransition)$
        // Transition style for full page slide backgrounds
        backgroundTransition: '$backgroundTransition$', // none/fade/slide/convex/concave/zoom
 $endif$
 $if(viewDistance)$
        // Number of slides away from the current that are visible
        viewDistance: $viewDistance$,
 $endif$
 $if(parallaxBackgroundImage)$
        // Parallax background image
        parallaxBackgroundImage: '$parallaxBackgroundImage$', // e.g. "'https://s3.amazonaws.com/hakim-static/reveal-js/reveal-parallax-1.jpg'"
 $endif$
 $if(parallaxBackgroundSize)$
        // Parallax background size
        parallaxBackgroundSize: '$parallaxBackgroundSize$', // CSS syntax, e.g. "2100px 900px"
 $endif$
 $if(parallaxBackgroundHorizontal)$
        // Amount to move parallax background (horizontal and vertical) on slide change
        // Number, e.g. 100
        parallaxBackgroundHorizontal: '$parallaxBackgroundHorizontal$',
 $endif$
 $if(parallaxBackgroundVertical)$
        parallaxBackgroundVertical: '$parallaxBackgroundVertical$',
 $endif$
 $if(width)$
        // The "normal" size of the presentation, aspect ratio will be preserved
        // when the presentation is scaled to fit different resolutions. Can be
        // specified using percentage units.
        width: $width$,
 $endif$
 $if(height)$
        height: $height$,
 $endif$
 $if(margin)$
        // Factor of the display size that should remain empty around the content
        margin: $margin$,
 $endif$
 $if(minScale)$
        // Bounds for smallest/largest possible scale to apply to content
        minScale: $minScale$,
 $endif$
 $if(maxScale)$
        maxScale: $maxScale$,
 $endif$
 $if(plugin-menu)$
        menu: {
 $if(menu-side)$
          side: $menu-side$,
 $endif$   
 $if(menu-numbers)$
          numbers: $menu-numbers$,
 $endif$    
 $if(menu-titleSelector)$
          titleSelector: $menu-titleSelector$,
 $endif$    
 $if(menu-hideMissingTitles)$
          hideMissingTitles: $menu-hideMissingTitles$,
 $endif$    
 $if(menu-markers)$
          markers: $menu-markers$,
 $endif$    
 $if(menu-openButton)$
          openButton: $menu-openButton$,
 $endif$    
 $if(menu-openSlideNumber)$
          openSlideNumber: $menu-openSlideNumber$,
 $endif$    
 $if(menu-keyboard)$
          keyboard: $menu-keyboard$,
 $endif$ 
          custom: false,
          themes: false,
          transitions: false
        },
 $endif$
 $if(plugin-chalkboard)$
        chalkboard: {
 $if(chalkboard-src)$
          src: $chalkboard-src$,
 $endif$
 $if(chalkboard-readOnly)$
          readOnly: $chalkboard-readOnly$,
 $endif$
 $if(chalkboard-toggleNotesButton)$
          toggleNotesButton: $chalkboard-toggleNotesButton$,
 $endif$
 $if(chalkboard-toggleChalkboardButton)$
          toggleChalkboardButton: $chalkboard-toggleChalkboardButton$,
 $endif$
 $if(chalkboard-transition)$
          transition: $chalkboard-transition$,
 $endif$
 $if(chalkboard-theme)$
          theme: $chalkboard-theme$,
 $endif$
 $if(chalkboard-color)$
          color: $chalkboard-color$,
 $endif$
 $if(chalkboard-background)$
          background: $chalkboard-background$,
 $endif$
 $if(chalkboard-pen)$
          pen: $chalkboard-pen$,
 $endif$
        },
        keyboard: {
          67: function() { RevealChalkboard.toggleNotesCanvas() },    // toggle notes canvas when 'c' is pressed
          66: function() { RevealChalkboard.toggleChalkboard() }, // toggle chalkboard when 'b' is pressed
          46: function() { RevealChalkboard.clear() },    // clear chalkboard when 'DEL' is pressed
           8: function() { RevealChalkboard.reset() },    // reset chalkboard data on current slide when 'BACKSPACE' is pressed
          68: function() { RevealChalkboard.download() }, // downlad recorded chalkboard drawing when 'd' is pressed
        },
 $endif$
        // Optional reveal.js plugins
        dependencies: [
 $if(plugin-notes)$
          { src: '$revealjs-url$/plugin/notes/notes.js', async: true },
 $endif$
 $if(plugin-search)$
          { src: '$revealjs-url$/plugin/search/search.js', async: true },
 $endif$
 $if(plugin-zoom)$
          { src: '$revealjs-url$/plugin/zoom-js/zoom.js', async: true },
 $endif$
 $if(plugin-chalkboard)$
          { src: '$revealjs-url$/plugin/chalkboard/chalkboard.js', async: true },
 $endif$
 $if(plugin-menu)$
          { src: '$revealjs-url$/plugin/menu/menu.js', async: true },
 $endif$
        ]
      });
    </script>
 $if(mathjax-url)$
  <!-- dynamically load mathjax for compatibility with self-contained -->
  <script>
    (function () {
      var script = document.createElement("script");
      script.type = "text/javascript";
      script.src  = "$mathjax-url$";
      document.getElementsByTagName("head")[0].appendChild(script);
    })();
  </script>
 $endif$
 <script>
  (function() {
    if (window.jQuery) {
      Reveal.addEventListener( 'slidechanged', function(event) {  
        window.jQuery(event.previousSlide).trigger('hidden');
        window.jQuery(event.currentSlide).trigger('shown');
      });
    }
  })();
 </script>
 $for(include-after)$
 $include-after$
 $endfor$
  </body>
 </html>
--- a/simulations/01_two_covariates.R
+++ b/simulations/01_two_covariates.R
@@ -30,11 +30,11 @@ source("simulation_base.R")
 #### how much power do we get from the model in the first place? (sweeping N and m)
 #### 
-simulate_data <- function(N, m, B0=0, Bxy=0.2, Bzy=-0.2, Bzx=0.2, y_explained_variance=0.025, prediction_accuracy=0.73, seed=1){
+simulate_data <- function(N, m, B0=0, Bxy=0.2, Bzy=-0.2, Bzx=0.2, y_explained_variance=0.025, prediction_accuracy=0.73, Px=0.5, seed=1){
    set.seed(seed)
    z <- rnorm(N,sd=0.5)
                                        #    x.var.epsilon <- var(Bzx *z) * ((1-zx_explained_variance)/zx_explained_variance)
-    xprime <- Bzx * z #+ x.var.epsilon
+    xprime <- Bzx * z + qlogis(Px)
    x <- rbinom(N,1,plogis(xprime))
    y.var.epsilon <- (var(Bzy * z) + var(Bxy *x) + 2*cov(Bxy*x,Bzy*z)) * ((1-y_explained_variance)/y_explained_variance)
@@ -78,18 +78,21 @@ parser <- add_argument(parser, "--truth_formula", help='formula for the true var
 parser <- add_argument(parser, "--Bzx", help='Effect of z on x', default=0.3)
 parser <- add_argument(parser, "--Bzy", help='Effect of z on y', default=-0.3)
 parser <- add_argument(parser, "--Bxy", help='Effect of x on y', default=0.3)
 parser <- add_argument(parser, "--Px", help='Base rate of x', default=0.5)
 parser <- add_argument(parser, "--confint_method", help='method for approximating confidence intervals', default='quad')
 args <- parse_args(parser)
 B0 <- 0
 Px <- args$Px
 Bxy <- args$Bxy
 Bzy <- args$Bzy
 Bzx <- args$Bzx
-df <- simulate_data(args$N, args$m, B0, Bxy, Bzy, Bzx, seed=args$seed + 500, y_explained_variance = args$y_explained_variance,  prediction_accuracy=args$prediction_accuracy)
+df <- simulate_data(args$N, args$m, B0, Bxy, Bzy, Bzx, Px, seed=args$seed + 500, y_explained_variance = args$y_explained_variance,  prediction_accuracy=args$prediction_accuracy)
-result <- list('N'=args$N,'m'=args$m,'B0'=B0,'Bxy'=Bxy, Bzx=Bzx, 'Bzy'=Bzy, 'seed'=args$seed, 'y_explained_variance'=args$y_explained_variance, 'prediction_accuracy'=args$prediction_accuracy, 'accuracy_imbalance_difference'=args$accuracy_imbalance_difference, 'outcome_formula'=args$outcome_formula, 'proxy_formula'=args$proxy_formula,truth_formula=args$truth_formula, error='')
+result <- list('N'=args$N,'m'=args$m,'B0'=B0,'Bxy'=Bxy, 'Bzx'=Bzx, 'Bzy'=Bzy, 'Px'=Px, 'seed'=args$seed, 'y_explained_variance'=args$y_explained_variance, 'prediction_accuracy'=args$prediction_accuracy, 'accuracy_imbalance_difference'=args$accuracy_imbalance_difference, 'outcome_formula'=args$outcome_formula, 'proxy_formula'=args$proxy_formula,truth_formula=args$truth_formula, confint_method=args$confint_method,error='')
-outline <- run_simulation(df, result, outcome_formula=as.formula(args$outcome_formula), proxy_formula=as.formula(args$proxy_formula), truth_formula=as.formula(args$truth_formula))
+outline <- run_simulation(df, result, outcome_formula=as.formula(args$outcome_formula), proxy_formula=as.formula(args$proxy_formula), truth_formula=as.formula(args$truth_formula),confint_method=args$confint_method)
 outfile_lock <- lock(paste0(args$outfile, '_lock'),exclusive=TRUE)
 if(file.exists(args$outfile)){
--- a/simulations/02_indep_differential.R
+++ b/simulations/02_indep_differential.R
@@ -31,11 +31,11 @@ source("simulation_base.R")
 ## one way to do it is by adding correlation to x.obs and y that isn't in w.
 ## in other words, the model is missing an important feature of x.obs that's related to y.
-simulate_data <- function(N, m, B0, Bxy, Bzx, Bzy, seed, y_explained_variance=0.025, prediction_accuracy=0.73, y_bias=-0.8,z_bias=0,accuracy_imbalance_difference=0.3){
+simulate_data <- function(N, m, B0, Bxy, Bzx, Bzy, seed, y_explained_variance=0.025, prediction_accuracy=0.73, y_bias=-0.8,z_bias=0,Px=0.5,accuracy_imbalance_difference=0.3){
    set.seed(seed)
    # make w and y dependent
    z <- rnorm(N,sd=0.5)
-    x <- rbinom(N, 1, plogis(Bzx * z))
+    x <- rbinom(N, 1, plogis(Bzx * z + qlogis(Px)))
    y.var.epsilon <- (var(Bzy * z) + var(Bxy *x) + 2*cov(Bzy*z,Bxy*x)) * ((1-y_explained_variance)/y_explained_variance)
    y.epsilon <- rnorm(N, sd = sqrt(y.var.epsilon))
@@ -104,9 +104,10 @@ simulate_data <- function(N, m, B0, Bxy, Bzx, Bzy, seed, y_explained_variance=0.
    ## print(mean(df[z==1]$x == df[z==1]$w_pred))
    ## print(mean(df$w_pred == df$x))
    resids <- resid(lm(y~x + z))
-    odds.x1 <- qlogis(prediction_accuracy) + y_bias*qlogis(pnorm(resids[x==1])) + z_bias * qlogis(pnorm(z,sd(z)))
+    odds.x1 <- qlogis(prediction_accuracy) + y_bias*qlogis(pnorm(resids[x==1])) + z_bias * qlogis(pnorm(z[x==1],sd(z)))
-    odds.x0 <- qlogis(prediction_accuracy,lower.tail=F) + y_bias*qlogis(pnorm(resids[x==0])) + z_bias * qlogis(pnorm(z,sd(z)))
+    odds.x0 <- qlogis(prediction_accuracy,lower.tail=F) + y_bias*qlogis(pnorm(resids[x==0])) + z_bias * qlogis(pnorm(z[x==0],sd(z)))
    ## acc.x0 <- p.correct[df[,x==0]]
    ## acc.x1 <- p.correct[df[,x==1]]
@@ -139,10 +140,12 @@ parser <- add_argument(parser, "--proxy_formula", help='formula for the proxy va
 parser <- add_argument(parser, "--y_bias", help='coefficient of y on the probability a classification is correct', default=-0.5)
 parser <- add_argument(parser, "--z_bias", help='coefficient of z on the probability a classification is correct', default=0)
 parser <- add_argument(parser, "--truth_formula", help='formula for the true variable', default="x~z")
-
+parser <- add_argument(parser, "--Px", help='base rate of x', default=0.5)
 parser <- add_argument(parser, "--confint_method", help='method for approximating confidence intervals', default='quad')
 args <- parse_args(parser)
 B0 <- 0
 Px <- args$Px
 Bxy <- args$Bxy
 Bzy <- args$Bzy
 Bzx <- args$Bzx
@@ -156,9 +159,9 @@ if(args$m < args$N){
    ## pc.df <- pc(suffStat=list(C=cor(df.pc),n=nrow(df.pc)),indepTest=gaussCItest,labels=names(df.pc),alpha=0.05)
    ## plot(pc.df)
-    result <- list('N'=args$N,'m'=args$m,'B0'=B0,'Bxy'=Bxy, Bzx=args$Bzx, 'Bzy'=Bzy, 'seed'=args$seed, 'y_explained_variance'=args$y_explained_variance, 'prediction_accuracy'=args$prediction_accuracy, 'accuracy_imbalance_difference'=args$accuracy_imbalance_difference, 'y_bias'=args$y_bias,'outcome_formula'=args$outcome_formula, 'proxy_formula'=args$proxy_formula,truth_formula=args$truth_formula, error='')
+    result <- list('N'=args$N,'m'=args$m,'B0'=B0,'Bxy'=Bxy, 'Bzx'=args$Bzx, 'Bzy'=Bzy, 'Px'=Px, 'seed'=args$seed, 'y_explained_variance'=args$y_explained_variance, 'prediction_accuracy'=args$prediction_accuracy, 'accuracy_imbalance_difference'=args$accuracy_imbalance_difference, 'y_bias'=args$y_bias,'outcome_formula'=args$outcome_formula, 'proxy_formula'=args$proxy_formula,truth_formula=args$truth_formula, confint_method=args$confint_method, error='')
-    outline <- run_simulation(df, result, outcome_formula=as.formula(args$outcome_formula), proxy_formula=as.formula(args$proxy_formula), truth_formula=as.formula(args$truth_formula))
+    outline <- run_simulation(df, result, outcome_formula=as.formula(args$outcome_formula), proxy_formula=as.formula(args$proxy_formula), truth_formula=as.formula(args$truth_formula),confint_method=args$confint_method)
 outfile_lock <- lock(paste0(args$outfile, '_lock'),exclusive=TRUE)
--- a/simulations/03_depvar.R
+++ b/simulations/03_depvar.R
@@ -73,15 +73,17 @@ parser <- add_argument(parser, "--y_explained_variance", help='what proportion o
 parser <- add_argument(parser, "--prediction_accuracy", help='how accurate is the predictive model?', default=0.72)
 ## parser <- add_argument(parser, "--x_bias_y1", help='how is the classifier biased when y = 1?', default=-0.75)
 ## parser <- add_argument(parser, "--x_bias_y0", help='how is the classifier biased when y = 0 ?', default=0.75)
-parser <- add_argument(parser, "--Bxy", help='coefficient of x on y', default=0.01)
+parser <- add_argument(parser, "--Bxy", help='coefficient of x on y', default=0.3)
-parser <- add_argument(parser, "--Bzy", help='coeffficient of z on y', default=-0.01)
+parser <- add_argument(parser, "--Bzy", help='coeffficient of z on y', default=-0.3)
 parser <- add_argument(parser, "--Bzx", help='coeffficient of z on x', default=-0.5)
 parser <- add_argument(parser, "--B0", help='Base rate of y', default=0.5)
 parser <- add_argument(parser, "--outcome_formula", help='formula for the outcome variable', default="y~x+z")
 parser <- add_argument(parser, "--proxy_formula", help='formula for the proxy variable', default="w_pred~y")
 parser <- add_argument(parser, "--confint_method", help='method for getting confidence intervals', default="quad")
 args <- parse_args(parser)
-B0 <- 0
+B0 <- args$B0
 Bxy <- args$Bxy
 Bzy <- args$Bzy
 Bzx <- args$Bzx
@@ -90,9 +92,9 @@ if(args$m < args$N){
    df <- simulate_data(args$N, args$m, B0, Bxy, Bzy, Bzx, args$seed, args$prediction_accuracy)
 #    result <- list('N'=args$N,'m'=args$m,'B0'=B0,'Bxy'=Bxy,'Bzy'=Bzy, 'seed'=args$seed, 'y_explained_variance'=args$y_explained_variance, 'prediction_accuracy'=args$prediction_accuracy, 'x_bias_y0'=args$x_bias_y0,'x_bias_y1'=args$x_bias_y1,'outcome_formula' = args$outcome_formula, 'proxy_formula' = args$proxy_formula)
-    result <- list('N'=args$N,'m'=args$m,'B0'=B0,'Bxy'=Bxy,'Bzy'=Bzy, 'Bzx'=Bzx,'seed'=args$seed, 'y_explained_variance'=args$y_explained_variance, 'prediction_accuracy'=args$prediction_accuracy, 'outcome_formula' = args$outcome_formula, 'proxy_formula' = args$proxy_formula)
+    result <- list('N'=args$N,'m'=args$m,'B0'=B0,'Bxy'=Bxy,'Bzy'=Bzy, 'Bzx'=Bzx,'seed'=args$seed, 'y_explained_variance'=args$y_explained_variance, 'prediction_accuracy'=args$prediction_accuracy, 'outcome_formula' = args$outcome_formula, 'proxy_formula' = args$proxy_formula, 'confint_method'=args$confint_method)
-    outline <- run_simulation_depvar(df, result, outcome_formula = as.formula(args$outcome_formula), proxy_formula = as.formula(args$proxy_formula))
+    outline <- run_simulation_depvar(df, result, outcome_formula = as.formula(args$outcome_formula), proxy_formula = as.formula(args$proxy_formula), confint_method=args$confint_method)
    outfile_lock <- lock(paste0(args$outfile, '_lock'),exclusive=TRUE)
--- a/simulations/03_depvar_differential.R
+++ b/simulations/03_depvar_differential.R
@@ -31,14 +31,14 @@ source("simulation_base.R")
 ## one way to do it is by adding correlation to x.obs and y that isn't in w.
 ## in other words, the model is missing an important feature of x.obs that's related to y.
-simulate_data <- function(N, m, B0, Bxy, Bzy, seed, prediction_accuracy=0.73, x_bias=-0.75){
+simulate_data <- function(N, m, B0, Bxy, Bzy, Py, seed, prediction_accuracy=0.73, x_bias=-0.75){
    set.seed(seed)
    # make w and y dependent
    z <- rbinom(N, 1, 0.5)
    x <- rbinom(N, 1, 0.5)
-    ystar <- Bzy * z + Bxy * x + B0
+    ystar <- Bzy * z + Bxy * x + B0 + qlogix(Py)
    y <- rbinom(N,1,plogis(ystar))
    # glm(y ~ x + z, family="binomial")
@@ -77,6 +77,7 @@ parser <- add_argument(parser, "--prediction_accuracy", help='how accurate is th
 parser <- add_argument(parser, "--x_bias", help='how is the classifier biased?', default=0.75)
 parser <- add_argument(parser, "--Bxy", help='coefficient of x on y', default=0.3)
 parser <- add_argument(parser, "--Bzy", help='coeffficient of z on y', default=-0.3)
 parser <- add_argument(parser, "--Py", help='Base rate of y', default=0.5)
 parser <- add_argument(parser, "--outcome_formula", help='formula for the outcome variable', default="y~x+z")
 parser <- add_argument(parser, "--proxy_formula", help='formula for the proxy variable', default="w_pred~y*x")
--- a/simulations/03_indep_differential_nonnorm.R
+++ b/simulations/03_indep_differential_nonnorm.R
@@ -0,0 +1,185 @@
 ### EXAMPLE 1: demonstrates how measurement error can lead to a type sign error in a covariate
 ### What kind of data invalidates fong + tyler?
 ### Even when you have a good predictor, if it's biased against a covariate you can get the wrong sign.
 ### Even when you include the proxy variable in the regression.
 ### But with some ground truth and multiple imputation, you can fix it.
 library(argparser)
 library(mecor)
 library(ggplot2)
 library(data.table)
 library(filelock)
 library(arrow)
 library(Amelia)
 library(Zelig)
 library(predictionError)
 options(amelia.parallel="no",
        amelia.ncpus=1)
 setDTthreads(40)
 source("simulation_base.R")
 ## SETUP:
 ### we want to estimate x -> y; x is MAR
 ### we have x -> k; k -> w; x -> w is used to predict x via the model w.
 ### A realistic scenario is that we have an NLP model predicting something like "racial harassment" in social media comments
 ### The labels x are binary, but the model provides a continuous predictor
 ### simulation:
 #### how much power do we get from the model in the first place? (sweeping N and m)
 #### 
 ## one way to do it is by adding correlation to x.obs and y that isn't in w.
 ## in other words, the model is missing an important feature of x.obs that's related to y.
 simulate_data <- function(N, m, B0, Bxy, Bzx, Bzy, seed, y_explained_variance=0.025, prediction_accuracy=0.73, y_bias=-0.8,z_bias=0,Px=0.5,accuracy_imbalance_difference=0.3,sd_y_mixin=1){
    set.seed(seed)
    # make w and y dependent
    z <- rnorm(N,sd=0.5)
    x <- rbinom(N, 1, plogis(Bzx * z + qlogis(Px)))
    ## following Fong + Tyler: mix y with a Bernoulli(0.15) × |N (0, 20)| to make a skewed non-normal distribution
    y.var.epsilon <- (var(Bzy * z) + var(Bxy *x) + 2*cov(Bzy*z,Bxy*x)) * ((1-y_explained_variance)/y_explained_variance)
    y.epsilon <- rnorm(N, sd = sqrt(y.var.epsilon))
    y <- Bzy * z + Bxy * x + y.epsilon + rbinom(N,1,0.15) * rnorm(N,0,sd_y_mixin)
    df <- data.table(x=x,y=y,z=z)
    if(m < N){
        df <- df[sample(nrow(df), m), x.obs := x]
    } else {
        df <- df[, x.obs := x]
    }
    ## probablity of an error is correlated with y
    ## pz <- mean(z)
    ## accuracy_imbalance_ratio <- (prediction_accuracy + accuracy_imbalance_difference/2) / (prediction_accuracy - accuracy_imbalance_difference/2)
    ## # this works because of conditional probability
    ## accuracy_z0 <- prediction_accuracy / (pz*(accuracy_imbalance_ratio) + (1-pz))
    ## accuracy_z1 <- accuracy_imbalance_ratio * accuracy_z0
    ## z0x0 <- df[(z==0) & (x==0)]$x
    ## z0x1 <- df[(z==0) & (x==1)]$x
    ## z1x0 <- df[(z==1) & (x==0)]$x
    ## z1x1 <- df[(z==1) & (x==1)]$x
    ## yz0x0 <- df[(z==0) & (x==0)]$y
    ## yz0x1 <- df[(z==0) & (x==1)]$y
    ## yz1x0 <- df[(z==1) & (x==0)]$y
    ## yz1x1 <- df[(z==1) & (x==1)]$y
    ## nz0x0 <- nrow(df[(z==0) & (x==0)])
    ## nz0x1 <- nrow(df[(z==0) & (x==1)])
    ## nz1x0 <- nrow(df[(z==1) & (x==0)])
    ## nz1x1 <- nrow(df[(z==1) & (x==1)])
    ## yz1 <- df[z==1]$y 
    ## yz1 <- df[z==1]$y 
    ## # tranform yz0.1 into a logistic distribution with mean accuracy_z0
    ## acc.z0x0 <- plogis(0.5*scale(yz0x0) + qlogis(accuracy_z0))
    ## acc.z0x1 <- plogis(0.5*scale(yz0x1) + qlogis(accuracy_z0))
    ## acc.z1x0 <- plogis(1.5*scale(yz1x0) + qlogis(accuracy_z1))
    ## acc.z1x1 <- plogis(1.5*scale(yz1x1) + qlogis(accuracy_z1))
    ## w0z0x0 <- (1-z0x0)**2 + (-1)**(1-z0x0) * acc.z0x0
    ## w0z0x1 <- (1-z0x1)**2 + (-1)**(1-z0x1) * acc.z0x1
    ## w0z1x0 <- (1-z1x0)**2 + (-1)**(1-z1x0) * acc.z1x0
    ## w0z1x1 <- (1-z1x1)**2 + (-1)**(1-z1x1) * acc.z1x1
    ## ##perrorz0 <- w0z0*(pyz0)
    ## ##perrorz1 <- w0z1*(pyz1)
    ## w0z0x0.noisy.odds <- rlogis(nz0x0,qlogis(w0z0x0))
    ## w0z0x1.noisy.odds <- rlogis(nz0x1,qlogis(w0z0x1))
    ## w0z1x0.noisy.odds <- rlogis(nz1x0,qlogis(w0z1x0))
    ## w0z1x1.noisy.odds <- rlogis(nz1x1,qlogis(w0z1x1))
    ## df[(z==0)&(x==0),w:=plogis(w0z0x0.noisy.odds)]
    ## df[(z==0)&(x==1),w:=plogis(w0z0x1.noisy.odds)]    
    ## df[(z==1)&(x==0),w:=plogis(w0z1x0.noisy.odds)]    
    ## df[(z==1)&(x==1),w:=plogis(w0z1x1.noisy.odds)]    
    ## df[,w_pred:=as.integer(w > 0.5)]
    ## print(mean(df[z==0]$x == df[z==0]$w_pred))
    ## print(mean(df[z==1]$x == df[z==1]$w_pred))
    ## print(mean(df$w_pred == df$x))
    resids <- resid(lm(y~x + z))
    odds.x1 <- qlogis(prediction_accuracy) + y_bias*qlogis(pnorm(resids[x==1],log.p=T),log.p=T) + z_bias * qlogis(pnorm(z[x==1],sd(z),log.p=T),log.p=T)
    odds.x0 <- qlogis(prediction_accuracy,lower.tail=F) + y_bias*qlogis(pnorm(resids[x==0],log.p=T),log.p=T) + z_bias * qlogis(pnorm(z[x==0],sd(z),log.p=T),log.p=T)
    ## acc.x0 <- p.correct[df[,x==0]]
    ## acc.x1 <- p.correct[df[,x==1]]
    df[x==0,w:=plogis(rlogis(.N,odds.x0))]
    df[x==1,w:=plogis(rlogis(.N,odds.x1))]
    print(prediction_accuracy)
    print(resids[is.na(df$w)])
    print(odds.x0[is.na(df$w)])
    print(odds.x1[is.na(df$w)])
    df[,w_pred := as.integer(w > 0.5)]
    print(mean(df$w_pred == df$x))
    print(mean(df[y>=0]$w_pred == df[y>=0]$x))
    print(mean(df[y<=0]$w_pred == df[y<=0]$x))
    return(df)
 }
 parser <- arg_parser("Simulate data and fit corrected models")
 parser <- add_argument(parser, "--N", default=5000, help="number of observations of w")
 parser <- add_argument(parser, "--m", default=500, help="m the number of ground truth observations")
 parser <- add_argument(parser, "--seed", default=51, help='seed for the rng')
 parser <- add_argument(parser, "--outfile", help='output file', default='example_2.feather')
 parser <- add_argument(parser, "--y_explained_variance", help='what proportion of the variance of y can be explained?', default=0.1)
 parser <- add_argument(parser, "--prediction_accuracy", help='how accurate is the predictive model?', default=0.75)
 parser <- add_argument(parser, "--accuracy_imbalance_difference", help='how much more accurate is the predictive model for one class than the other?', default=0.3)
 parser <- add_argument(parser, "--Bzx", help='Effect of z on x', default=0.3)
 parser <- add_argument(parser, "--Bzy", help='Effect of z on y', default=-0.3)
 parser <- add_argument(parser, "--Bxy", help='Effect of z on y', default=0.3)
 parser <- add_argument(parser, "--outcome_formula", help='formula for the outcome variable', default="y~x+z")
 parser <- add_argument(parser, "--proxy_formula", help='formula for the proxy variable', default="w_pred~y*z*x")
 parser <- add_argument(parser, "--y_bias", help='coefficient of y on the probability a classification is correct', default=-0.5)
 parser <- add_argument(parser, "--z_bias", help='coefficient of z on the probability a classification is correct', default=0)
 parser <- add_argument(parser, "--truth_formula", help='formula for the true variable', default="x~z")
 parser <- add_argument(parser, "--Px", help='base rate of x', default=0.5)
 parser <- add_argument(parser, "--confint_method", help='method for approximating confidence intervals', default='quad')
 parser <- add_argument(parser, "--sd_y_mixin", help='varience of the non-normal part of Y', default=10)
 args <- parse_args(parser)
 B0 <- 0
 Px <- args$Px
 Bxy <- args$Bxy
 Bzy <- args$Bzy
 Bzx <- args$Bzx
 if(args$m < args$N){
    df <- simulate_data(args$N, args$m, B0, Bxy, Bzx, Bzy, args$seed, args$y_explained_variance, args$prediction_accuracy, y_bias=args$y_bias, sd_y_mixin=args$sd_y_mixin)
    ## df.pc <- df[,.(x,y,z,w_pred,w)]
    ##                                     #    df.pc <- df.pc[,err:=x-w_pred]
    ## pc.df <- pc(suffStat=list(C=cor(df.pc),n=nrow(df.pc)),indepTest=gaussCItest,labels=names(df.pc),alpha=0.05)
    ## plot(pc.df)
    result <- list('N'=args$N,'m'=args$m,'B0'=B0,'Bxy'=Bxy, 'Bzx'=args$Bzx, 'Bzy'=Bzy, 'Px'=Px, 'seed'=args$seed, 'y_explained_variance'=args$y_explained_variance, 'prediction_accuracy'=args$prediction_accuracy, 'accuracy_imbalance_difference'=args$accuracy_imbalance_difference, 'y_bias'=args$y_bias,'outcome_formula'=args$outcome_formula, 'proxy_formula'=args$proxy_formula,truth_formula=args$truth_formula, confint_method=args$confint_method, error='', 'sd_y_mixin'=args$sd_y_mixin)
    outline <- run_simulation(df, result, outcome_formula=as.formula(args$outcome_formula), proxy_formula=as.formula(args$proxy_formula), truth_formula=as.formula(args$truth_formula),confint_method=args$confint_method)
 outfile_lock <- lock(paste0(args$outfile, '_lock'),exclusive=TRUE)
    if(file.exists(args$outfile)){
        logdata <- read_feather(args$outfile)
        logdata <- rbind(logdata,as.data.table(outline), fill=TRUE)
    } else {
        logdata <- as.data.table(outline)
    }
    print(outline)
    write_feather(logdata, args$outfile)
    unlock(outfile_lock)
 }
--- a/simulations/04_depvar_differential.R
+++ b/simulations/04_depvar_differential.R
@@ -31,12 +31,12 @@ source("simulation_base.R")
 ## one way to do it is by adding correlation to x.obs and y that isn't in w.
 ## in other words, the model is missing an important feature of x.obs that's related to y.
-simulate_data <- function(N, m, B0, Bxy, Bzy, seed, prediction_accuracy=0.73, z_bias=-0.75){
+simulate_data <- function(N, m, B0, Bxy, Bzy, Bxz=0, seed=0, prediction_accuracy=0.73, z_bias=-0.75){
    set.seed(seed)
    # make w and y dependent
    z <- rnorm(N,sd=0.5)
-    x <- rbinom(N,1,0.5)
+    x <- rbinom(N,1,plogis(Bxz*z))
    ystar <- Bzy * z + Bxy * x + B0
    y <- rbinom(N,1,plogis(ystar))
@@ -70,29 +70,32 @@ parser <- add_argument(parser, "--N", default=1000, help="number of observations
 parser <- add_argument(parser, "--m", default=500, help="m the number of ground truth observations")
 parser <- add_argument(parser, "--seed", default=17, help='seed for the rng')
 parser <- add_argument(parser, "--outfile", help='output file', default='example_4.feather')
-parser <- add_argument(parser, "--prediction_accuracy", help='how accurate is the predictive model?', default=0.79)
+parser <- add_argument(parser, "--prediction_accuracy", help='how accurate is the predictive model?', default=0.75)
 ## parser <- add_argument(parser, "--z_bias_y1", help='how is the classifier biased when y = 1?', default=-0.75)
 ## parser <- add_argument(parser, "--z_bias_y0", help='how is the classifier biased when y = 0 ?', default=0.75)
-parser <- add_argument(parser, "--z_bias", help='how is the classifier biased?', default=1.5)
+parser <- add_argument(parser, "--z_bias", help='how is the classifier biased?', default=-0.5)
-parser <- add_argument(parser, "--Bxy", help='coefficient of x on y', default=0.1)
+parser <- add_argument(parser, "--Bxy", help='coefficient of x on y', default=0.7)
-parser <- add_argument(parser, "--Bzy", help='coeffficient of z on y', default=-0.1)
+parser <- add_argument(parser, "--Bzy", help='coeffficient of z on y', default=-0.7)
 parser <- add_argument(parser, "--Bzx", help='coeffficient of z on y', default=1)
 parser <- add_argument(parser, "--B0", help='coeffficient of z on y', default=0)
 parser <- add_argument(parser, "--outcome_formula", help='formula for the outcome variable', default="y~x+z")
 parser <- add_argument(parser, "--proxy_formula", help='formula for the proxy variable', default="w_pred~y+z")
-
+parser <- add_argument(parser, "--confint_method", help='method for approximating confidence intervals', default='quad')
 args <- parse_args(parser)
-B0 <- 0
+B0 <- args$B0
 Bxy <- args$Bxy
 Bzy <- args$Bzy
-
+Bzx <- args$Bzx
 if(args$m < args$N){
-    df <- simulate_data(args$N, args$m, B0, Bxy, Bzy, args$seed, args$prediction_accuracy, args$z_bias)
+    df <- simulate_data(args$N, args$m, B0, Bxy, Bzx, Bzy, args$seed, args$prediction_accuracy, args$z_bias)
-#    result <- list('N'=args$N,'m'=args$m,'B0'=B0,'Bxy'=Bxy,'Bzy'=Bzy, 'seed'=args$seed, 'y_explained_variance'=args$y_explained_variance, 'prediction_accuracy'=args$prediction_accuracy, 'z_bias_y0'=args$z_bias_y0,'z_bias_y1'=args$z_bias_y1,'outcome_formula' = args$outcome_formula, 'proxy_formula' = args$proxy_formula)
+#    result <- list('N'=args$N,'m'=args$m,'B0'=B0,'Bxy'=Bxy,'Bzy'=Bzy,'Bzx'=Bzx, 'seed'=args$seed, 'y_explained_variance'=args$y_explained_variance, 'prediction_accuracy'=args$prediction_accuracy, 'z_bias_y0'=args$z_bias_y0,'z_bias_y1'=args$z_bias_y1,'outcome_formula' = args$outcome_formula, 'proxy_formula' = args$proxy_formula)
-    result <- list('N'=args$N,'m'=args$m,'B0'=B0,'Bxy'=Bxy,'Bzy'=Bzy, 'seed'=args$seed, 'y_explained_variance'=args$y_explained_variance, 'prediction_accuracy'=args$prediction_accuracy, 'z_bias'=args$z_bias,'outcome_formula' = args$outcome_formula, 'proxy_formula' = args$proxy_formula)
+    result <- list('N'=args$N,'m'=args$m,'B0'=B0,'Bxy'=Bxy,'Bzy'=Bzy,'Bzx'=Bzx, 'seed'=args$seed, 'y_explained_variance'=args$y_explained_variance, 'prediction_accuracy'=args$prediction_accuracy, 'z_bias'=args$z_bias,'outcome_formula' = args$outcome_formula, 'proxy_formula' = args$proxy_formula, confint_method=args$confint_method)
-    outline <- run_simulation_depvar(df, result, outcome_formula = as.formula(args$outcome_formula), proxy_formula = as.formula(args$proxy_formula))
+    outline <- run_simulation_depvar(df, result, outcome_formula = as.formula(args$outcome_formula), proxy_formula = as.formula(args$proxy_formula),confint_method=args$confint_method)
    print(outline$error.cor.z)
    outfile_lock <- lock(paste0(args$outfile, '_lock'),exclusive=TRUE)
--- a/simulations/Makefile
+++ b/simulations/Makefile
@@ -1,4 +1,4 @@
-
+.ONESHELL:
 SHELL=bash
 Ns=[1000, 5000, 10000]
@@ -6,8 +6,9 @@ ms=[100, 200, 400]
 seeds=[$(shell seq -s, 1 500)]
 explained_variances=[0.1]
-all:remembr.RDS remember_irr.RDS
+all:main supplement
-supplement: remember_robustness_misspec.RDS
+main:remembr.RDS 
 supplement:robustness_1.RDS robustness_1_dv.RDS robustness_2.RDS robustness_2_dv.RDS robustness_3.RDS robustness_3_dv.RDS robustness_3_proflik.RDS robustness_3_dv_proflik.RDS robustness_4.RDS robustness_4_dv.RDS robustness_5.RDS robustness_5_dv.RDS robustness_6.feather
 srun=sbatch --wait --verbose run_job.sbatch
@@ -15,7 +16,7 @@ srun=sbatch --wait --verbose run_job.sbatch
 joblists:example_1_jobs example_2_jobs example_3_jobs
 # test_true_z_jobs: test_true_z.R simulation_base.R
-# 	sbatch --wait --verbose run_job.sbatch grid_sweep.py --command "Rscript test_true_z.R" --arg_dict '{"N":${Ns},"m":${ms}, "seed":${seeds}, "outfile":["test_true_z.feather"], "y_explained_variancevari":${explained_variances}, "Bzx":${Bzx}}' --outfile test_true_z_jobsb
+# 	sbatch --wait --verbose run_job.sbatch  ./grid_sweep.py --command "Rscript test_true_z.R" --arg_dict '{"N":${Ns},"m":${ms}, "seed":${seeds}, "outfile":["test_true_z.feather"], "y_explained_variancevari":${explained_variances}, "Bzx":${Bzx}}' --outfile test_true_z_jobsb
 # test_true_z.feather: test_true_z_jobs 
 # 	rm -f test_true_z.feather
@@ -24,57 +25,54 @@ joblists:example_1_jobs example_2_jobs example_3_jobs
 example_1_jobs: 01_two_covariates.R simulation_base.R grid_sweep.py pl_methods.R
-	sbatch --wait --verbose run_job.sbatch grid_sweep.py --command "Rscript 01_two_covariates.R" --arg_dict '{"N":${Ns},"m":${ms}, "seed":${seeds}, "outfile":["example_1.feather"], "y_explained_variance":${explained_variances}, "Bzx":[1]}' --outfile example_1_jobs
+	${srun} ./grid_sweep.py --command "Rscript 01_two_covariates.R" --arg_dict '{"N":${Ns},"m":${ms}, "seed":${seeds}, "outfile":["example_1.feather"], "y_explained_variance":${explained_variances}, "Bzx":[1]}' --outfile example_1_jobs
 example_1.feather: example_1_jobs 
 	rm -f example_1.feather
 	sbatch --wait --verbose --array=1-1000  run_simulation.sbatch 0 example_1_jobs
-	sbatch --wait --verbose --array=1001-2000  run_simulation.sbatch 0 example_1_jobs
+	sbatch --wait --verbose --array=1001-2000 run_simulation.sbatch 0 example_1_jobs
 	sbatch --wait --verbose --array=2001-3000  run_simulation.sbatch 0 example_1_jobs
-	sbatch --wait --verbose --array=3001-4000  run_simulation.sbatch 0 example_1_jobs
+	sbatch --wait --verbose --array=3001-$(shell cat example_1_jobs | wc -l) run_simulation.sbatch 0 example_1_jobs
 	sbatch --wait --verbose --array=4001-$(shell cat example_1_jobs | wc -l)  run_simulation.sbatch 0 example_1_jobs
 example_2_jobs: 02_indep_differential.R simulation_base.R grid_sweep.py pl_methods.R
-	sbatch --wait --verbose run_job.sbatch grid_sweep.py --command "Rscript 02_indep_differential.R" --arg_dict '{"N":${Ns},"m":${ms}, "seed":${seeds}, "outfile":["example_2.feather"],"y_explained_variance":${explained_variances},  "Bzy":[-0.3],"Bxy":[0.3],"Bzx":[1], "outcome_formula":["y~x+z"], "proxy_formula":["w_pred~y*z*x"]}' --outfile example_2_jobs
+	sbatch --wait --verbose run_job.sbatch ./grid_sweep.py --command "Rscript 02_indep_differential.R" --arg_dict '{"N":${Ns},"m":${ms}, "seed":${seeds}, "outfile":["example_2.feather"],"y_explained_variance":${explained_variances},  "Bzy":[-0.3],"Bxy":[0.3],"Bzx":[1], "outcome_formula":["y~x+z"], "proxy_formula":["w_pred~y+z+x"]}' --outfile example_2_jobs
 example_2.feather: example_2_jobs 
 	rm -f example_2.feather
 	sbatch --wait --verbose --array=1-1000  run_simulation.sbatch 0 example_2_jobs
-	sbatch --wait --verbose --array=1001-2000  run_simulation.sbatch 0 example_2_jobs
+	sbatch --wait --verbose --array=1001-2000 run_simulation.sbatch 0 example_2_jobs
 	sbatch --wait --verbose --array=2001-3000  run_simulation.sbatch 0 example_2_jobs
-	sbatch --wait --verbose --array=3001-4000  run_simulation.sbatch 0 example_2_jobs
+	sbatch --wait --verbose --array=3001-$(shell cat example_2_jobs | wc -l) run_simulation.sbatch 0 example_2_jobs
-	sbatch --wait --verbose --array=4001-$(shell cat example_2_jobs | wc -l)  run_simulation.sbatch 0 example_2_jobs
+
 # example_2_B_jobs: example_2_B.R
-# 	sbatch --wait --verbose run_job.sbatch grid_sweep.py --command "Rscript example_2_B.R" --arg_dict '{"N":${Ns},"m":${ms}, "seed":${seeds}, "outfile":["example_2_B.feather"]}' --outfile example_2_B_jobs
+# 	sbatch --wait --verbose run_job.sbatch python3 ./grid_sweep.py --command "Rscript example_2_B.R" --arg_dict '{"N":${Ns},"m":${ms}, "seed":${seeds}, "outfile":["example_2_B.feather"]}' --outfile example_2_B_jobs
 # example_2_B.feather: example_2_B_jobs
 # 	rm -f example_2_B.feather
 # 	sbatch --wait --verbose --array=1-3000 run_simulation.sbatch 0 example_2_B_jobs
 example_3_jobs: 03_depvar.R simulation_base.R grid_sweep.py pl_methods.R
-	sbatch --wait --verbose run_job.sbatch grid_sweep.py --command "Rscript 03_depvar.R" --arg_dict '{"N":${Ns},"m":${ms}, "Bxy":[0.7],"Bzy":[-0.7],"seed":${seeds}, "outfile":["example_3.feather"], "y_explained_variance":${explained_variances}}' --outfile example_3_jobs
+	sbatch --wait --verbose run_job.sbatch ./grid_sweep.py --command "Rscript 03_depvar.R" --arg_dict '{"N":${Ns},"m":${ms}, "Bxy":[0.7],"Bzy":[-0.7],"Bzx":[1],"seed":${seeds}, "outfile":["example_3.feather"], "y_explained_variance":${explained_variances}}' --outfile example_3_jobs
 example_3.feather: example_3_jobs
 	rm -f example_3.feather	
 	sbatch --wait --verbose --array=1-1000  run_simulation.sbatch 0 example_3_jobs
-	sbatch --wait --verbose --array=1001-2000  run_simulation.sbatch 0 example_3_jobs
+	sbatch --wait --verbose --array=1001-2000 run_simulation.sbatch 0 example_3_jobs
-	sbatch --wait --verbose --array=2001-3000  run_simulation.sbatch 0 example_3_jobs
+	 sbatch --wait --verbose --array=2001-3000  run_simulation.sbatch 0 example_3_jobs
-	sbatch --wait --verbose --array=3001-4000  run_simulation.sbatch 0 example_3_jobs
+	sbatch --wait --verbose --array=3001-$(shell cat example_3_jobs | wc -l) run_simulation.sbatch 0 example_3_jobs
 	sbatch --wait --verbose --array=4001-$(shell cat example_3_jobs | wc -l)  run_simulation.sbatch 0 example_3_jobs
 example_4_jobs: 04_depvar_differential.R simulation_base.R grid_sweep.py pl_methods.R
-	sbatch --wait --verbose run_job.sbatch grid_sweep.py --command "Rscript 04_depvar_differential.R" --arg_dict '{"N":${Ns},"Bxy":[0.7],"Bzy":[-0.7],"m":${ms}, "seed":${seeds}, "outfile":["example_4.feather"], "z_bias":[0.5]}' --outfile example_4_jobs
+	sbatch --wait --verbose run_job.sbatch ./grid_sweep.py --command "Rscript 04_depvar_differential.R" --arg_dict '{"N":${Ns},"Bxy":[0.7],"Bzy":[-0.7],"Bzx":[1], "m":${ms}, "seed":${seeds}, "outfile":["example_4.feather"], "z_bias":[-0.5], "prediction_accuracy":[0.73]}' --outfile example_4_jobs
 example_4.feather: example_4_jobs
 	rm -f example_4.feather
 	sbatch --wait --verbose --array=1-1000  run_simulation.sbatch 0 example_4_jobs
 	sbatch --wait --verbose --array=1001-2000  run_simulation.sbatch 0 example_4_jobs
 	sbatch --wait --verbose --array=2001-3001 run_simulation.sbatch 0 example_4_jobs
 	sbatch --wait --verbose --array=2001-3000  run_simulation.sbatch 0 example_4_jobs
-	sbatch --wait --verbose --array=3001-4000  run_simulation.sbatch 0 example_4_jobs
+	sbatch --wait --verbose --array=3001-$(shell cat example_4_jobs | wc -l)  run_simulation.sbatch 0 example_4_jobs
 	sbatch --wait --verbose --array=4001-$(shell cat example_4_jobs | wc -l)  run_simulation.sbatch 0 example_4_jobs
 remembr.RDS:example_1.feather example_2.feather example_3.feather example_4.feather plot_example.R plot_dv_example.R summarize_estimator.R
@@ -85,70 +83,397 @@ remembr.RDS:example_1.feather example_2.feather example_3.feather example_4.feat
 	${srun} Rscript plot_dv_example.R --infile example_4.feather --name "plot.df.example.4"
-irr_Ns = [1000]
+START=0
-irr_ms = [150,300,600]
+STEP=1000
-irr_seeds=${seeds}
+ONE=1
 irr_explained_variances=${explained_variances}
 irr_coder_accuracy=[0.80]
-example_5_jobs: 05_irr_indep.R irr_simulation_base.R grid_sweep.py pl_methods.R measerr_methods.R
+robustness_Ns=[1000,5000]
-	sbatch --wait --verbose run_job.sbatch grid_sweep.py --command "Rscript 05_irr_indep.R" --arg_dict '{"N":${irr_Ns},"m":${irr_ms}, "seed":${irr_seeds}, "outfile":["example_5.feather"], "y_explained_variance":${irr_explained_variances}, "coder_accuracy":${irr_coder_accuracy}}' --outfile example_5_jobs
+robustness_ms=[100,200]
-example_5.feather:example_5_jobs
+#in robustness 1 / example 2 misclassification is correlated with Y.
-	rm -f example_5.feather
+robustness_1_jobs_p1: 02_indep_differential.R simulation_base.R grid_sweep.py
-	sbatch --wait --verbose --array=1-1000  run_simulation.sbatch 0 example_5_jobs
+	sbatch --wait --verbose run_job.sbatch ./grid_sweep.py --command "Rscript 02_indep_differential.R" --arg_dict '{"N":[1000],"m":${robustness_ms}, "seed":${seeds}, "outfile":["robustness_1.feather"],"y_explained_variance":${explained_variances},  "Bzy":[-0.3,0],"Bxy":[0.3],"Bzx":[1,0], "outcome_formula":["y~x+z"], "z_bias":[0, 0.5], "proxy_formula":["w_pred~y+x"], "truth_formula":["x~1"]}' --outfile robustness_1_jobs_p1
-	sbatch --wait --verbose --array=1001-$(shell cat example_5_jobs | wc -l)  run_simulation.sbatch 1000 example_5_jobs
+
-	# sbatch --wait --verbose --array=2001-3000  run_simulation.sbatch 2000 example_5_jobs
+robustness_1_jobs_p2: 02_indep_differential.R simulation_base.R grid_sweep.py
-	# sbatch --wait --verbose --array=3001-4000  run_simulation.sbatch 3000 example_5_jobs
+	sbatch --wait --verbose run_job.sbatch ./grid_sweep.py --command "Rscript 02_indep_differential.R" --arg_dict '{"N":[5000],"m":${robustness_ms}, "seed":${seeds}, "outfile":["robustness_1.feather"],"y_explained_variance":${explained_variances},  "Bzy":[-0.3,0],"Bxy":[0.3],"Bzx":[1,0], "outcome_formula":["y~x+z"], "z_bias":[0, 0.5], "proxy_formula":["w_pred~y+x"], "truth_formula":["x~1"]}' --outfile robustness_1_jobs_p2
-	# sbatch --wait --verbose --array=2001-$(shell cat example_5_jobs | wc -l)  run_simulation.sbatch 4000 example_5_jobs
+
 robustness_1.feather: robustness_1_jobs_p1 robustness_1_jobs_p2
 	rm -f $@
 	$(eval END_1!=cat robustness_1_jobs_p1 | wc -l)
 	$(eval ITEMS_1!=seq $(START) $(STEP) $(END_1))
 	$(eval END_2!=cat robustness_1_jobs_p2 | wc -l)
 	$(eval ITEMS_2!=seq $(START) $(STEP) $(END_2))
 	$(foreach item,$(ITEMS_1),sbatch --wait --verbose --array=$(shell expr $(item) + $(ONE))-$(shell expr $(item) + $(STEP)) run_simulation.sbatch 0 robustness_1_jobs_p1;)
 	$(foreach item,$(ITEMS_2),sbatch --wait --verbose --array=$(shell expr $(item) + $(ONE))-$(shell expr $(item) + $(STEP)) run_simulation.sbatch 0 robustness_1_jobs_p2;)
 robustness_1.RDS: robustness_1.feather summarize_estimator.R
 	rm -f robustness_1.RDS
 	${srun} Rscript plot_example.R --infile $< --name "robustness_1" --remember-file $@
 # when Bzy is 0 and zbias is not zero, we have the case where P(W|Y,X,Z) has an omitted variable that is conditionanlly independent from Y.  Note that X and Z are independent in this scenario.
 robustness_1_dv_jobs_p1: simulation_base.R 04_depvar_differential.R grid_sweep.py
 	${srun} ./grid_sweep.py --command 'Rscript 04_depvar_differential.R' --arg_dict '{"N":[1000],"Bzx":[1], "Bxy":[0.7,0],"Bzy":[-0.7,0],"m":${robustness_ms}, "seed":${seeds}, "outfile":["robustness_1_dv.feather"], "proxy_formula":["w_pred~y"],"z_bias":[-0.5]}' --outfile robustness_1_dv_jobs_p1
 robustness_1_dv_jobs_p2: simulation_base.R 04_depvar_differential.R grid_sweep.py
 	${srun} ./grid_sweep.py --command 'Rscript 04_depvar_differential.R' --arg_dict '{"N":[5000],"Bzx":[1], "Bxy":[0.7,0],"Bzy":[-0.7,0],"m":${robustness_ms}, "seed":${seeds}, "outfile":["robustness_1_dv.feather"], "proxy_formula":["w_pred~y"],"z_bias":[-0.5]}' --outfile robustness_1_dv_jobs_p2
 robustness_1_dv.feather: robustness_1_dv_jobs_p1 robustness_1_dv_jobs_p2
 	rm -f $@
 	$(eval END_1!=cat robustness_1_dv_jobs_p1 | wc -l)
 	$(eval ITEMS_1!=seq $(START) $(STEP) $(END_1))
 	$(eval END_2!=cat robustness_1_dv_jobs_p2 | wc -l)
 	$(eval ITEMS_2!=seq $(START) $(STEP) $(END_1))
 	$(foreach item,$(ITEMS_1),sbatch --wait --verbose --array=$(shell expr $(item) + $(ONE))-$(shell expr $(item) + $(STEP)) run_simulation.sbatch 0 robustness_1_dv_jobs_p1;)
 	$(foreach item,$(ITEMS_2),sbatch --wait --verbose --array=$(shell expr $(item) + $(ONE))-$(shell expr $(item) + $(STEP)) run_simulation.sbatch 0 robustness_1_dv_jobs_p2;)
 robustness_1_dv.RDS: robustness_1_dv.feather summarize_estimator.R
 	rm -f $@
 	${srun} Rscript plot_dv_example.R --infile $< --name "robustness_1_dv" --remember-file $@
 robustness_2_jobs_p1: grid_sweep.py 01_two_covariates.R simulation_base.R
 	rm -f $@
 	${srun} ./$< --command 'Rscript 01_two_covariates.R'  --arg_dict '{"N":${robustness_Ns},"m":${robustness_ms}, "seed":${seeds}, "outfile":["robustness_2.feather"],"y_explained_variance":${explained_variances},  "Bzy":[-0.3],"Bxy":[0.3],"Bzx":[1], "outcome_formula":["y~x+z"], "proxy_formula":["w_pred~y+x"], "truth_formula":["x~z"], "prediction_accuracy":[0.60,0.65]}' --outfile $@
 robustness_2_jobs_p2: grid_sweep.py 01_two_covariates.R simulation_base.R
 	rm -f $@
 	${srun} ./$< --command 'Rscript 01_two_covariates.R'  --arg_dict '{"N":${robustness_Ns},"m":${robustness_ms}, "seed":${seeds}, "outfile":["robustness_2.feather"],"y_explained_variance":${explained_variances},  "Bzy":[-0.3],"Bxy":[0.3],"Bzx":[1], "outcome_formula":["y~x+z"], "proxy_formula":["w_pred~y+x"], "truth_formula":["x~z"], "prediction_accuracy":[0.70,0.75]}' --outfile $@
 robustness_2_jobs_p3: grid_sweep.py 01_two_covariates.R simulation_base.R
 	rm -f $@
 	${srun} ./$< --command 'Rscript 01_two_covariates.R'  --arg_dict '{"N":${robustness_Ns},"m":${robustness_ms}, "seed":${seeds}, "outfile":["robustness_2.feather"],"y_explained_variance":${explained_variances},  "Bzy":[-0.3],"Bxy":[0.3],"Bzx":[1], "outcome_formula":["y~x+z"], "proxy_formula":["w_pred~y+x"], "truth_formula":["x~z"], "prediction_accuracy":[0.80,0.85]}' --outfile $@
 robustness_2_jobs_p4: grid_sweep.py 01_two_covariates.R simulation_base.R
 	rm -f $@
 	${srun} ./$< --command 'Rscript 01_two_covariates.R'  --arg_dict '{"N":${robustness_Ns},"m":${robustness_ms}, "seed":${seeds}, "outfile":["robustness_2.feather"],"y_explained_variance":${explained_variances},  "Bzy":[-0.3],"Bxy":[0.3],"Bzx":[1], "outcome_formula":["y~x+z"], "proxy_formula":["w_pred~y+x"], "truth_formula":["x~z"], "prediction_accuracy":[0.90,0.95]}' --outfile $@
 robustness_2.feather: robustness_2_jobs_p1 robustness_2_jobs_p2 robustness_2_jobs_p3 robustness_2_jobs_p4
 	rm $@
 	$(eval END_1!=cat robustness_2_jobs_p1 | wc -l)
 	$(eval ITEMS_1!=seq $(START) $(STEP) $(END_1))
 	$(eval END_2!=cat robustness_2_jobs_p2 | wc -l)
 	$(eval ITEMS_2!=seq $(START) $(STEP) $(END_2))
 	$(eval END_3!=cat robustness_2_jobs_p3 | wc -l)
 	$(eval ITEMS_3!=seq $(START) $(STEP) $(END_3))
 	$(eval END_4!=cat robustness_2_jobs_p4 | wc -l)
 	$(eval ITEMS_4!=seq $(START) $(STEP) $(END_4))
 	$(foreach item,$(ITEMS_1),sbatch --wait --verbose --array=$(shell expr $(item) + $(ONE))-$(shell expr $(item) + $(STEP)) run_simulation.sbatch 0 robustness_2_jobs_p1;)
 	$(foreach item,$(ITEMS_2),sbatch --wait --verbose --array=$(shell expr $(item) + $(ONE))-$(shell expr $(item) + $(STEP)) run_simulation.sbatch 0 robustness_2_jobs_p2;)
 	$(foreach item,$(ITEMS_3),sbatch --wait --verbose --array=$(shell expr $(item) + $(ONE))-$(shell expr $(item) + $(STEP)) run_simulation.sbatch 0 robustness_2_jobs_p3;)
 	$(foreach item,$(ITEMS_4),sbatch --wait --verbose --array=$(shell expr $(item) + $(ONE))-$(shell expr $(item) + $(STEP)) run_simulation.sbatch 0 robustness_2_jobs_p4;)
 robustness_2.RDS: plot_example.R robustness_2.feather summarize_estimator.R
 	rm -f $@
 	${srun} Rscript $< --infile $(word 2, $^) --name "robustness_2" --remember-file $@
 robustness_2_dv_jobs_p1: grid_sweep.py 03_depvar.R simulation_base.R
 	rm -f $@
 	${srun} ./$< --command 'Rscript 03_depvar.R'  --arg_dict '{"N":${robustness_Ns},"m":${robustness_ms}, "seed":${seeds}, "outfile":["robustness_2_dv.feather"],"y_explained_variance":${explained_variances},  "Bzy":[-0.3],"Bxy":[0.3],"Bzx":[1], "outcome_formula":["y~x+z"], "prediction_accuracy":[0.60,0.65]}' --outfile $@
 robustness_2_dv_jobs_p2: grid_sweep.py 03_depvar.R simulation_base.R
 	rm -f $@
 	${srun} ./$< --command 'Rscript 03_depvar.R'  --arg_dict '{"N":${robustness_Ns},"m":${robustness_ms}, "seed":${seeds}, "outfile":["robustness_2_dv.feather"],"y_explained_variance":${explained_variances},  "Bzy":[-0.3],"Bxy":[0.3],"Bzx":[1], "outcome_formula":["y~x+z"], "prediction_accuracy":[0.70,0.75]}' --outfile $@
 robustness_2_dv_jobs_p3: grid_sweep.py 03_depvar.R simulation_base.R
 	rm -f $@
 	${srun} ./$< --command 'Rscript 03_depvar.R'  --arg_dict '{"N":${robustness_Ns},"m":${robustness_ms}, "seed":${seeds}, "outfile":["robustness_2_dv.feather"],"y_explained_variance":${explained_variances},  "Bzy":[-0.3],"Bxy":[0.3],"Bzx":[1], "outcome_formula":["y~x+z"], "prediction_accuracy":[0.80,0.85]}' --outfile $@
 robustness_2_dv_jobs_p4: grid_sweep.py 03_depvar.R simulation_base.R
 	rm -f $@
 	${srun} ./$< --command 'Rscript 03_depvar.R'  --arg_dict '{"N":${robustness_Ns},"m":${robustness_ms}, "seed":${seeds}, "outfile":["robustness_2_dv.feather"],"y_explained_variance":${explained_variances},  "Bzy":[-0.3],"Bxy":[0.3],"Bzx":[1], "outcome_formula":["y~x+z"], "prediction_accuracy":[0.90,0.95]}' --outfile $@
 robustness_2_dv.feather: robustness_2_dv_jobs_p1 robustness_2_dv_jobs_p2 robustness_2_dv_jobs_p3 robustness_2_dv_jobs_p4
 	rm -f $@
 	$(eval END_1!=cat robustness_2_dv_jobs_p1 | wc -l)
 	$(eval ITEMS_1!=seq $(START) $(STEP) $(END_1))
 	$(eval END_2!=cat robustness_2_dv_jobs_p2 | wc -l)
 	$(eval ITEMS_2!=seq $(START) $(STEP) $(END_2))
 	$(eval END_3!=cat robustness_2_dv_jobs_p3 | wc -l)
 	$(eval ITEMS_3!=seq $(START) $(STEP) $(END_3))
 	$(eval END_4!=cat robustness_2_dv_jobs_p4 | wc -l)
 	$(eval ITEMS_4!=seq $(START) $(STEP) $(END_4))
 	$(foreach item,$(ITEMS_1),sbatch --wait --verbose --array=$(shell expr $(item) + $(ONE))-$(shell expr $(item) + $(STEP)) run_simulation.sbatch 0 robustness_2_dv_jobs_p1;)
 	$(foreach item,$(ITEMS_2),sbatch --wait --verbose --array=$(shell expr $(item) + $(ONE))-$(shell expr $(item) + $(STEP)) run_simulation.sbatch 0 robustness_2_dv_jobs_p2;)
 	$(foreach item,$(ITEMS_3),sbatch --wait --verbose --array=$(shell expr $(item) + $(ONE))-$(shell expr $(item) + $(STEP)) run_simulation.sbatch 0 robustness_2_dv_jobs_p3;)
 	$(foreach item,$(ITEMS_4),sbatch --wait --verbose --array=$(shell expr $(item) + $(ONE))-$(shell expr $(item) + $(STEP)) run_simulation.sbatch 0 robustness_2_dv_jobs_p4;)
 robustness_2_dv.RDS: plot_dv_example.R robustness_2_dv.feather summarize_estimator.R
 	rm -f $@
 	${srun} Rscript $< --infile $(word 2, $^) --name "robustness_2_dv" --remember-file $@
 robustness_3_proflik_jobs: grid_sweep.py 01_two_covariates.R simulation_base.R
 	rm -f $@
 	${srun} ./$< --command 'Rscript 01_two_covariates.R'  --arg_dict '{"N":[1000],"m":[100], "seed":${seeds}, "outfile":["robustness_3_proflik.feather"],"y_explained_variance":${explained_variances},  "Bzy":[-0.3],"Bxy":[0.3],"Bzx":[1],"Px":[0.5,0.6,0.7,0.8,0.9,0.95], "outcome_formula":["y~x+z"], "proxy_formula":["w_pred~y+x"], "truth_formula":["x~z"], "prediction_accuracy":[0.85], "confint_method":['spline']}' --outfile $@
 robustness_3_proflik.feather: robustness_3_proflik_jobs
 	rm -f $@
 	$(eval END_1!=cat robustness_3_proflik_jobs | wc -l)
 	$(eval ITEMS_1!=seq $(START) $(STEP) $(END_1))
 	$(foreach item,$(ITEMS_1),sbatch --wait --verbose --array=$(shell expr $(item) + $(ONE))-$(shell expr $(item) + $(STEP)) run_simulation.sbatch 0 robustness_3_proflik_jobs;)
 robustness_3_proflik.RDS: plot_example.R robustness_3_proflik.feather summarize_estimator.R
 	rm -f $@
 	${srun} Rscript $< --infile $(word 2, $^) --name "robustness_3_proflik" --remember-file $@
 robustness_3_jobs_p1: grid_sweep.py 01_two_covariates.R simulation_base.R
 	rm -f $@
 	${srun} ./$< --command 'Rscript 01_two_covariates.R'  --arg_dict '{"N":${robustness_Ns},"m":${robustness_ms}, "seed":${seeds}, "outfile":["robustness_3.feather"],"y_explained_variance":${explained_variances},  "Bzy":[-0.3],"Bxy":[0.3],"Bzx":[1],"Px":[0.5,0.6], "outcome_formula":["y~x+z"], "proxy_formula":["w_pred~y+x"], "truth_formula":["x~z"], "prediction_accuracy":[0.85]}' --outfile $@
 robustness_3_jobs_p2: grid_sweep.py 01_two_covariates.R simulation_base.R
 	rm -f $@
 	${srun} ./$< --command 'Rscript 01_two_covariates.R'  --arg_dict '{"N":${robustness_Ns},"m":${robustness_ms}, "seed":${seeds}, "outfile":["robustness_3.feather"],"y_explained_variance":${explained_variances},  "Bzy":[-0.3],"Bxy":[0.3],"Bzx":[1],"Px":[0.7,0.8], "outcome_formula":["y~x+z"], "proxy_formula":["w_pred~y+x"], "truth_formula":["x~z"], "prediction_accuracy":[0.85]}' --outfile $@
 robustness_3_jobs_p3: grid_sweep.py 01_two_covariates.R simulation_base.R
 	rm -f $@
 	${srun} ./$< --command 'Rscript 01_two_covariates.R'  --arg_dict '{"N":${robustness_Ns},"m":${robustness_ms}, "seed":${seeds}, "outfile":["robustness_3.feather"],"y_explained_variance":${explained_variances},  "Bzy":[-0.3],"Bxy":[0.3],"Bzx":[1],"Px":[0.9,0.95], "outcome_formula":["y~x+z"], "proxy_formula":["w_pred~y+x"], "truth_formula":["x~z"], "prediction_accuracy":[0.85]}' --outfile $@
 robustness_3.feather: robustness_3_jobs_p1 robustness_3_jobs_p2 robustness_3_jobs_p3
 	rm -f $@
 	$(eval END_1!=cat robustness_3_jobs_p1 | wc -l)
 	$(eval ITEMS_1!=seq $(START) $(STEP) $(END_1))
 	$(eval END_2!=cat robustness_3_jobs_p2 | wc -l)
 	$(eval ITEMS_2!=seq $(START) $(STEP) $(END_2))
 	$(eval END_3!=cat robustness_3_jobs_p3 | wc -l)
 	$(eval ITEMS_3!=seq $(START) $(STEP) $(END_3))
 	$(foreach item,$(ITEMS_1),sbatch --wait --verbose --array=$(shell expr $(item) + $(ONE))-$(shell expr $(item) + $(STEP)) run_simulation.sbatch 0 robustness_3_jobs_p1;)
 	$(foreach item,$(ITEMS_2),sbatch --wait --verbose --array=$(shell expr $(item) + $(ONE))-$(shell expr $(item) + $(STEP)) run_simulation.sbatch 0 robustness_3_jobs_p2;)
 	$(foreach item,$(ITEMS_3),sbatch --wait --verbose --array=$(shell expr $(item) + $(ONE))-$(shell expr $(item) + $(STEP)) run_simulation.sbatch 0 robustness_3_jobs_p3;)
 robustness_3.RDS: plot_example.R robustness_3.feather summarize_estimator.R
 	rm -f $@
 	${srun} Rscript $< --infile $(word 2, $^) --name "robustness_3" --remember-file $@
 robustness_3_dv_proflik_jobs: grid_sweep.py 03_depvar.R simulation_base.R
 	rm -f $@
 	${srun} ./$< --command 'Rscript 03_depvar.R'  --arg_dict '{"N":[1000],"m":[100], "seed":${seeds}, "outfile":["robustness_3_dv_proflik.feather"],"y_explained_variance":${explained_variances},  "Bzy":[-0.3],"Bxy":[0.3],"Bzx":[1],"B0":[0,0.405,0.846,1.386,2.197,2.944], "outcome_formula":["y~x+z"], "prediction_accuracy":[0.85],"confint_method":['spline']}' --outfile $@
 robustness_3_dv_proflik.feather: robustness_3_dv_proflik_jobs
 	rm -f $@
 	$(eval END_1!=cat robustness_3_dv_proflik_jobs | wc -l)
 	$(eval ITEMS_1!=seq $(START) $(STEP) $(END_1))
 	$(foreach item,$(ITEMS_1),sbatch --wait --verbose --array=$(shell expr $(item) + $(ONE))-$(shell expr $(item) + $(STEP)) run_simulation.sbatch 0 robustness_3_dv_proflik_jobs;)
 robustness_3_dv_proflik.RDS: plot_dv_example.R robustness_3_dv_proflik.feather summarize_estimator.R
 	rm -f $@
 	${srun} Rscript $< --infile $(word 2, $^) --name "robustness_3_dv_proflik" --remember-file $@
 robustness_3_dv_jobs_p1: grid_sweep.py 03_depvar.R simulation_base.R
 	rm -f $@
 	${srun} ./$< --command 'Rscript 03_depvar.R'  --arg_dict '{"N":${robustness_Ns},"m":${robustness_ms}, "seed":${seeds}, "outfile":["robustness_3_dv.feather"],"y_explained_variance":${explained_variances},  "Bzy":[-0.3],"Bxy":[0.3],"Bzx":[1],"B0":[0,0.405], "outcome_formula":["y~x+z"], "prediction_accuracy":[0.85]}' --outfile $@
 robustness_3_dv_jobs_p2: grid_sweep.py 03_depvar.R simulation_base.R
 	rm -f $@
 	${srun} ./$< --command 'Rscript 03_depvar.R'  --arg_dict '{"N":${robustness_Ns},"m":${robustness_ms}, "seed":${seeds}, "outfile":["robustness_3_dv.feather"],"y_explained_variance":${explained_variances},  "Bzy":[-0.3],"Bxy":[0.3],"Bzx":[1],"B0":[0.847,1.386], "outcome_formula":["y~x+z"], "prediction_accuracy":[0.85]}' --outfile $@
 robustness_3_dv_jobs_p3: grid_sweep.py 03_depvar.R simulation_base.R
 	rm -f $@
 	${srun} ./$< --command 'Rscript 03_depvar.R'  --arg_dict '{"N":${robustness_Ns},"m":${robustness_ms}, "seed":${seeds}, "outfile":["robustness_3_dv.feather"],"y_explained_variance":${explained_variances},  "Bzy":[-0.3],"Bxy":[0.3],"Bzx":[1], "B0":[2.197,2.944], "outcome_formula":["y~x+z"], "prediction_accuracy":[0.85]}' --outfile $@
 robustness_3_dv.feather: robustness_3_dv_jobs_p1 robustness_3_dv_jobs_p2 robustness_3_dv_jobs_p3
 	rm -f $@
 	$(eval END_1!=cat robustness_3_dv_jobs_p1 | wc -l)
 	$(eval ITEMS_1!=seq $(START) $(STEP) $(END_1))
 	$(eval END_2!=cat robustness_3_dv_jobs_p2 | wc -l)
 	$(eval ITEMS_2!=seq $(START) $(STEP) $(END_2))
 	$(eval END_3!=cat robustness_3_dv_jobs_p3 | wc -l)
 	$(eval ITEMS_3!=seq $(START) $(STEP) $(END_3))
 	$(foreach item,$(ITEMS_1),sbatch --wait --verbose --array=$(shell expr $(item) + $(ONE))-$(shell expr $(item) + $(STEP)) run_simulation.sbatch 0 robustness_3_dv_jobs_p1;)
 	$(foreach item,$(ITEMS_2),sbatch --wait --verbose --array=$(shell expr $(item) + $(ONE))-$(shell expr $(item) + $(STEP)) run_simulation.sbatch 0 robustness_3_dv_jobs_p2;)
 	 $(foreach item,$(ITEMS_3),sbatch --wait --verbose --array=$(shell expr $(item) + $(ONE))-$(shell expr $(item) + $(STEP)) run_simulation.sbatch 0 robustness_3_dv_jobs_p3;)
 robustness_3_dv.RDS: plot_dv_example.R robustness_3_dv.feather summarize_estimator.R
 	rm -f $@
 	${srun} Rscript $< --infile $(word 2, $^) --name "robustness_3_dv" --remember-file $@
-# example_6_jobs: 06_irr_dv.R irr_dv_simulation_base.R grid_sweep.py pl_methods.R
+robustness_4_jobs_p1: grid_sweep.py 02_indep_differential.R simulation_base.R
-# 	sbatch --wait --verbose run_job.sbatch grid_sweep.py --command "Rscript 06_irr_dv.R" --arg_dict '{"N":${irr_Ns},"m":${irr_ms}, "seed":${irr_seeds}, "outfile":["example_6.feather"], "y_explained_variance":${irr_explained_variances},"coder_accuracy":${irr_coder_accuracy}}' --outfile example_6_jobs
+	rm -f $@
 	${srun} ./$< --command 'Rscript 02_indep_differential.R' --arg_dict '{"N":${robustness_Ns},"m":${robustness_ms}, "seed":${seeds}, "outfile":["robustness_4.feather"],"y_explained_variance":${explained_variances}, "Bzy":[-0.3],"Bxy":[0.3],"Bzx":[1], "outcome_formula":["y~x+z"], "proxy_formula":["w_pred~y+x+z"], "truth_formula":["x~z"], "y_bias":[-2.944,-2.197]}' --outfile $@
-# example_6.feather:example_6_jobs
+robustness_4_jobs_p2: grid_sweep.py 02_indep_differential.R simulation_base.R
-# 	rm -f example_6.feather
+	rm -f $@
-# 	sbatch --wait --verbose --array=1-1000  run_simulation.sbatch 0 example_6_jobs
+	${srun} ./$< --command 'Rscript 02_indep_differential.R' --arg_dict '{"N":${robustness_Ns},"m":${robustness_ms}, "seed":${seeds}, "outfile":["robustness_4.feather"],"y_explained_variance":${explained_variances}, "Bzy":[-0.3],"Bxy":[0.3],"Bzx":[1], "outcome_formula":["y~x+z"], "proxy_formula":["w_pred~y+x+z"], "truth_formula":["x~z"], "y_bias":[-1.386,-0.846]}' --outfile $@
-# 	sbatch --wait --verbose --array=1001-2000  run_simulation.sbatch 1000 example_6_jobs
+
-# 	sbatch --wait --verbose --array=2001-$(shell cat example_6_jobs | wc -l)  run_simulation.sbatch 2000 example_6_jobs
+robustness_4_jobs_p3: grid_sweep.py 02_indep_differential.R simulation_base.R
 	rm -f ./$@
 	${srun} ./$< --command 'Rscript 02_indep_differential.R' --arg_dict '{"N":${robustness_Ns},"m":${robustness_ms}, "seed":${seeds}, "outfile":["robustness_4.feather"],"y_explained_variance":${explained_variances}, "Bzy":[-0.3],"Bxy":[0.3],"Bzx":[1], "outcome_formula":["y~x+z"], "proxy_formula":["w_pred~y+x+z"], "truth_formula":["x~z"], "y_bias":[-0.405,-0.25]}' --outfile $@
 robustness_4_jobs_p4: grid_sweep.py 02_indep_differential.R simulation_base.R
 	rm -f ./$@
 	${srun} ./$< --command 'Rscript 02_indep_differential.R' --arg_dict '{"N":${robustness_Ns},"m":${robustness_ms}, "seed":${seeds}, "outfile":["robustness_4.feather"],"y_explained_variance":${explained_variances}, "Bzy":[-0.3],"Bxy":[0.3],"Bzx":[1], "outcome_formula":["y~x+z"], "proxy_formula":["w_pred~y+x+z"], "truth_formula":["x~z"], "y_bias":[0,-0.1]}' --outfile $@
-remember_irr.RDS: example_5.feather plot_irr_example.R plot_irr_dv_example.R summarize_estimator.R
+robustness_4.feather: robustness_4_jobs_p1 robustness_4_jobs_p2 robustness_4_jobs_p3
-	rm -f remember_irr.RDS
+	rm -f $@
-	sbatch --wait --verbose run_job.sbatch Rscript plot_irr_example.R --infile example_5.feather --name "plot.df.example.5"
+	$(eval END_1!=cat robustness_4_jobs_p1 | wc -l)
-#	sbatch --wait --verbose run_job.sbatch Rscript plot_irr_dv_example.R --infile example_6.feather --name "plot.df.example.6"
+	$(eval ITEMS_1!=seq $(START) $(STEP) $(END_1))
 	$(eval END_2!=cat robustness_4_jobs_p2 | wc -l)
 	$(eval ITEMS_2!=seq $(START) $(STEP) $(END_2))
 	$(eval END_3!=cat robustness_4_jobs_p3 | wc -l)
 	$(eval ITEMS_3!=seq $(START) $(STEP) $(END_3))
 	$(eval END_4!=cat robustness_4_jobs_p3 | wc -l)
 	$(eval ITEMS_3!=seq $(START) $(STEP) $(END_4))
-
+	$(foreach item,$(ITEMS_1),sbatch --wait --verbose --array=$(shell expr $(item) + $(ONE))-$(shell expr $(item) + $(STEP)) run_simulation.sbatch 0 robustness_4_jobs_p1;)
-robustness_1_jobs: 02_indep_differential.R simulation_base.R grid_sweep.py
+	$(foreach item,$(ITEMS_2),sbatch --wait --verbose --array=$(shell expr $(item) + $(ONE))-$(shell expr $(item) + $(STEP)) run_simulation.sbatch 0 robustness_4_jobs_p2;)
-	sbatch --wait --verbose run_job.sbatch grid_sweep.py --command "Rscript 02_indep_differential.R" --arg_dict '{"N":${Ns},"m":${ms}, "seed":${seeds}, "outfile":["robustness_1.feather"],"y_explained_variance":${explained_variances},  "Bzy":[-0.3],"Bxy":[0.3],"Bzx":[0.3], "outcome_formula":["y~x+z"], "proxy_formula":["w_pred~y+x"], "truth_formula":["x~1"]}' --outfile robustness_1_jobs
+	$(foreach item,$(ITEMS_3),sbatch --wait --verbose --array=$(shell expr $(item) + $(ONE))-$(shell expr $(item) + $(STEP)) run_simulation.sbatch 0 robustness_4_jobs_p3;)
 	$(foreach item,$(ITEMS_4),sbatch --wait --verbose --array=$(shell expr $(item) + $(ONE))-$(shell expr $(item) + $(STEP)) run_simulation.sbatch 0 robustness_4_jobs_p3;)
-
+robustness_4.RDS: plot_example.R robustness_4.feather summarize_estimator.R
-robustness_1.feather: robustness_1_jobs
+	rm -f $@
-	rm -f robustness_1.feather
+	${srun} Rscript $< --infile $(word 2, $^) --name "robustness_4" --remember-file $@
 	sbatch --wait --verbose --array=1-$(shell cat robustness_1_jobs | wc -l)  run_simulation.sbatch 0 robustness_1_jobs
 robustness_1_dv_jobs: simulation_base.R 04_depvar_differential.R grid_sweep.py
 	${srun} bash -c "source ~/.bashrc && grid_sweep.py --command 'Rscript 04_depvar_differential.R' --arg_dict \"{'N':${Ns},'m':${ms}, 'seed':${seeds}, 'outfile':['robustness_1_dv.feather'], 'y_explained_variance':${explained_variances}, 'proxy_formula':['w_pred~y']}\" --outfile robustness_1_dv_jobs"
-robustness_1_dv.feather: robustness_1_dv_jobs
+# '{"N":${robustness_Ns},"Bxy":[0.7],"Bzy":[-0.7],"m":${ms}, "seed":${seeds}, "outfile":["example_4.feather"], "z_bias":[0.5]}' --example_4_jobs
-	rm -f robustness_1_dv.feather
+
-	sbatch --wait --verbose --array=1-$(shell cat example_3_jobs | wc -l)  run_simulation.sbatch 0 robustness_1_dv_jobs
+robustness_4_dv_jobs_p1: grid_sweep.py 04_depvar_differential.R simulation_base.R
 	rm -f $@
 	${srun} ./$< --command 'Rscript 04_depvar_differential.R'  --arg_dict '{"N":${robustness_Ns},"m":${robustness_ms}, "seed":${seeds}, "outfile":["robustness_4_dv.feather"], "Bzy":[-0.7],"Bxy":[0.7],"Bzx":[1], "outcome_formula":["y~x+z"], "z_bias":[0,0.1]}' --outfile $@
 robustness_4_dv_jobs_p2: grid_sweep.py 04_depvar_differential.R simulation_base.R
 	rm -f $@
 	${srun} ./$< --command 'Rscript 04_depvar_differential.R'  --arg_dict '{"N":${robustness_Ns},"m":${robustness_ms}, "seed":${seeds}, "outfile":["robustness_4_dv.feather"], "Bzy":[-0.7],"Bxy":[0.7],"Bzx":[1], "outcome_formula":["y~x+z"], "z_bias":[0.25,0.405]}' --outfile $@
 robustness_4_dv_jobs_p3: grid_sweep.py 04_depvar_differential.R simulation_base.R
 	rm -f $@
 	${srun} ./$< --command 'Rscript 04_depvar_differential.R'  --arg_dict '{"N":${robustness_Ns},"m":${robustness_ms}, "seed":${seeds}, "outfile":["robustness_4_dv.feather"], "Bzy":[-0.7],"Bxy":[0.7],"Bzx":[1],"outcome_formula":["y~x+z"],"z_bias":[0.846,1.386]}' --outfile $@ 
 robustness_4_dv_jobs_p4: grid_sweep.py 04_depvar_differential.R simulation_base.R
 	rm -f $@
 	${srun} ./$< --command 'Rscript 04_depvar_differential.R'  --arg_dict '{"N":${robustness_Ns},"m":${robustness_ms}, "seed":${seeds}, "outfile":["robustness_4_dv.feather"],"Bzy":[-0.7],"Bxy":[0.7],"Bzx":[1], "outcome_formula":["y~x+z"], "z_bias":[2.197,2.944]}' --outfile $@
 robustness_4_dv.feather: robustness_4_dv_jobs_p1 robustness_4_dv_jobs_p2 robustness_4_dv_jobs_p3 robustness_4_dv_jobs_p4
 	rm -f $@
 	$(eval END_1!=cat robustness_4_dv_jobs_p1 | wc -l)
 	$(eval ITEMS_1!=seq $(START) $(STEP) $(END_1))
 	$(eval END_2!=cat robustness_4_dv_jobs_p2 | wc -l)
 	$(eval ITEMS_2!=seq $(START) $(STEP) $(END_2))
 	$(eval END_3!=cat robustness_4_dv_jobs_p3 | wc -l)
 	$(eval ITEMS_3!=seq $(START) $(STEP) $(END_3))
 	$(eval END_4!=cat robustness_4_dv_jobs_p4 | wc -l)
 	$(eval ITEMS_4!=seq $(START) $(STEP) $(END_4))
 	$(foreach item,$(ITEMS_1),sbatch --wait --verbose --array=$(shell expr $(item) + $(ONE))-$(shell expr $(item) + $(STEP)) run_simulation.sbatch 0 robustness_4_dv_jobs_p1;)
 	$(foreach item,$(ITEMS_2),sbatch --wait --verbose --array=$(shell expr $(item) + $(ONE))-$(shell expr $(item) + $(STEP)) run_simulation.sbatch 0 robustness_4_dv_jobs_p2;)
 	$(foreach item,$(ITEMS_3),sbatch --wait --verbose --array=$(shell expr $(item) + $(ONE))-$(shell expr $(item) + $(STEP)) run_simulation.sbatch 0 robustness_4_dv_jobs_p3;)
 	$(foreach item,$(ITEMS_4),sbatch --wait --verbose --array=$(shell expr $(item) + $(ONE))-$(shell expr $(item) + $(STEP)) run_simulation.sbatch 0 robustness_4_dv_jobs_p4;)
-remember_robustness_misspec.RDS: robustness_1.feather robustness_1_dv.feather
+robustness_4_dv.RDS: plot_dv_example.R robustness_4_dv.feather summarize_estimator.R
-	rm -f remember_robustness_misspec.RDS
+	rm -f $@
-	sbatch --wait --verbose run_job.sbatch Rscript plot_example.R --infile robustness_1.feather --name "plot.df.robustness.1" --remember-file "remember_robustness_misspec.RDS"
+	${srun} Rscript $< --infile $(word 2, $^) --name "robustness_4_dv" --remember-file $@
 	sbatch --wait --verbose run_job.sbatch Rscript plot_dv_example.R --infile robustness_1_dv.feather --name "plot.df.robustness.1.dv" --remember-file "remember_robustness_mispec.RDS"
-clean:
+robustness_5_jobs_p1: grid_sweep.py 02_indep_differential.R simulation_base.R
 	rm -f $@
 	${srun} ./$< --command 'Rscript 02_indep_differential.R' --arg_dict '{"N":${robustness_Ns},"m":${robustness_ms}, "seed":${seeds}, "outfile":["robustness_5.feather"],"y_explained_variance":${explained_variances}, "Bzy":[-0.3],"Bxy":[0.3],"Bzx":[1.386,2.197], "outcome_formula":["y~x+z"], "proxy_formula":["w_pred~y+x+z"], "truth_formula":["x~z"]}' --outfile $@
 robustness_5_jobs_p2: grid_sweep.py 02_indep_differential.R simulation_base.R
 	rm -f $@
 	${srun} ./$< --command 'Rscript 02_indep_differential.R' --arg_dict '{"N":${robustness_Ns},"m":${robustness_ms}, "seed":${seeds}, "outfile":["robustness_5.feather"],"y_explained_variance":${explained_variances}, "Bzy":[-0.3],"Bxy":[0.3],"Bzx":[0.405,0.846], "outcome_formula":["y~x+z"], "proxy_formula":["w_pred~y+x+z"], "truth_formula":["x~z"]}' --outfile $@
 robustness_5_jobs_p3: grid_sweep.py 02_indep_differential.R simulation_base.R
 	rm -f $@
 	${srun} ./$< --command 'Rscript 02_indep_differential.R' --arg_dict '{"N":${robustness_Ns},"m":${robustness_ms}, "seed":${seeds}, "outfile":["robustness_5.feather"],"y_explained_variance":${explained_variances}, "Bzy":[-0.3],"Bxy":[0.3],"Bzx":[0,0.25], "outcome_formula":["y~x+z"], "proxy_formula":["w_pred~y+x+z"], "truth_formula":["x~z"]}' --outfile $@
 robustness_5_jobs_p4: grid_sweep.py 02_indep_differential.R simulation_base.R
 	rm -f $@
 	${srun} ./$< --command 'Rscript 02_indep_differential.R' --arg_dict '{"N":${robustness_Ns},"m":${robustness_ms}, "seed":${seeds}, "outfile":["robustness_5.feather"],"y_explained_variance":${explained_variances}, "Bzy":[-0.3],"Bxy":[0.3],"Bzx":[2.944], "outcome_formula":["y~x+z"], "proxy_formula":["w_pred~y+x+z"], "truth_formula":["x~z"]}' --outfile $@
 robustness_5.feather: robustness_5_jobs_p1 robustness_5_jobs_p2 robustness_5_jobs_p3
 	rm -f $@
 	$(eval END_1!=cat robustness_5_jobs_p1 | wc -l)
 	$(eval ITEMS_1!=seq $(START) $(STEP) $(END_1))
 	$(eval END_2!=cat robustness_5_jobs_p2 | wc -l)
 	$(eval ITEMS_2!=seq $(START) $(STEP) $(END_2))
 	$(eval END_3!=cat robustness_5_jobs_p3 | wc -l)
 	$(eval ITEMS_3!=seq $(START) $(STEP) $(END_3))
 	$(eval END_4!=cat robustness_5_jobs_p4 | wc -l)
 	$(eval ITEMS_4!=seq $(START) $(STEP) $(END_4))
 	$(foreach item,$(ITEMS_1),sbatch --wait --verbose --array=$(shell expr $(item) + $(ONE))-$(shell expr $(item) + $(STEP)) run_simulation.sbatch 0 robustness_5_jobs_p1;)
 	$(foreach item,$(ITEMS_2),sbatch --wait --verbose --array=$(shell expr $(item) + $(ONE))-$(shell expr $(item) + $(STEP)) run_simulation.sbatch 0 robustness_5_jobs_p2;)
 	$(foreach item,$(ITEMS_3),sbatch --wait --verbose --array=$(shell expr $(item) + $(ONE))-$(shell expr $(item) + $(STEP)) run_simulation.sbatch 0 robustness_5_jobs_p3;)
 	$(foreach item,$(ITEMS_4),sbatch --wait --verbose --array=$(shell expr $(item) + $(ONE))-$(shell expr $(item) + $(STEP)) run_simulation.sbatch 0 robustness_5_jobs_p4;)
 robustness_5.RDS: plot_example.R robustness_5.feather summarize_estimator.R
 	rm -f $@
 	${srun} Rscript $< --infile $(word 2, $^) --name "robustness_5" --remember-file $@
 # '{"N":${robustness_Ns},"Bxy":[0.7],"Bzy":[-0.7],"m":${ms}, "seed":${seeds}, "outfile":["example_4.feather"], "z_bias":[0.5]}' --example_4_jobs
 robustness_5_dv_jobs_p1: grid_sweep.py 04_depvar_differential.R simulation_base.R
 	rm -f $@
 	 ${srun} ./$< --command 'Rscript 04_depvar_differential.R'  --arg_dict '{"N":${robustness_Ns},"m":${robustness_ms}, "seed":${seeds}, "outfile":["robustness_5_dv.feather"], "Bzy":[-0.7],"Bxy":[0.7],"Bzx":[0,0.25], "outcome_formula":["y~x+z"], "z_bias":[-0.5]}' --outfile $@
 robustness_5_dv_jobs_p2: grid_sweep.py 04_depvar_differential.R simulation_base.R
 	rm -f $@
 	${srun} ./$< --command 'Rscript 04_depvar_differential.R'  --arg_dict '{"N":${robustness_Ns},"m":${robustness_ms}, "seed":${seeds}, "outfile":["robustness_5_dv.feather"], "Bzy":[-0.7],"Bxy":[0.7],"Bzx":[0.405,0.846], "outcome_formula":["y~x+z"],  "z_bias":[-0.5]}' --outfile $@
 robustness_5_dv_jobs_p3: grid_sweep.py 04_depvar_differential.R simulation_base.R
 	rm -f $@
 	${srun} ./$< --command 'Rscript 04_depvar_differential.R'  --arg_dict '{"N":${robustness_Ns},"m":${robustness_ms}, "seed":${seeds}, "outfile":["robustness_5_dv.feather"], "Bzy":[-0.7],"Bxy":[0.7],"Bzx":[1.386,2.197],"outcome_formula":["y~x+z"], "z_bias":[-0.5]}' --outfile $@ 
 robustness_5_dv_jobs_p4: grid_sweep.py 04_depvar_differential.R simulation_base.R
 	rm -f $@
 	${srun} ./$< --command 'Rscript 04_depvar_differential.R'  --arg_dict '{"N":${robustness_Ns},"m":${robustness_ms}, "seed":${seeds}, "outfile":["robustness_5_dv.feather"],"Bzy":[-0.7],"Bxy":[0.7],"Bzx":[2.944], "outcome_formula":["y~x+z"], "z_bias":[-0.5]}' --outfile $@
 robustness_5_dv.feather: robustness_5_dv_jobs_p1 robustness_5_dv_jobs_p2 robustness_5_dv_jobs_p3 robustness_5_dv_jobs_p4
 	rm -f $@
 	$(eval END_1!=cat robustness_5_dv_jobs_p1 | wc -l)
 	$(eval ITEMS_1!=seq $(START) $(STEP) $(END_1))
 	$(eval END_2!=cat robustness_5_dv_jobs_p2 | wc -l)
 	$(eval ITEMS_2!=seq $(START) $(STEP) $(END_2))
 	$(eval END_3!=cat robustness_5_dv_jobs_p3 | wc -l)
 	$(eval ITEMS_3!=seq $(START) $(STEP) $(END_3))
 	$(eval END_4!=cat robustness_5_dv_jobs_p4 | wc -l)
 	$(eval ITEMS_4!=seq $(START) $(STEP) $(END_4))
 	$(foreach item,$(ITEMS_1),sbatch --wait --verbose --array=$(shell expr $(item) + $(ONE))-$(shell expr $(item) + $(STEP)) run_simulation.sbatch 0 robustness_5_dv_jobs_p1;)
 	$(foreach item,$(ITEMS_2),sbatch --wait --verbose --array=$(shell expr $(item) + $(ONE))-$(shell expr $(item) + $(STEP)) run_simulation.sbatch 0 robustness_5_dv_jobs_p2;)
 	$(foreach item,$(ITEMS_3),sbatch --wait --verbose --array=$(shell expr $(item) + $(ONE))-$(shell expr $(item) + $(STEP)) run_simulation.sbatch 0 robustness_5_dv_jobs_p3;)
 	$(foreach item,$(ITEMS_4),sbatch --wait --verbose --array=$(shell expr $(item) + $(ONE))-$(shell expr $(item) + $(STEP)) run_simulation.sbatch 0 robustness_5_dv_jobs_p4;)
 robustness_5_dv.RDS: plot_dv_example.R robustness_5_dv.feather summarize_estimator.R
 	rm -f $@
 	${srun} Rscript $< --infile $(word 2, $^) --name "robustness_5_dv" --remember-file $@
 clean_main:
 	rm -f remembr.RDS
 	rm -f example_1_jobs
 	rm -f example_2_jobs
 	rm -f example_3_jobs
 	rm -f example_4_jobs
 	rm -f example_1.feather
 	rm -f example_2.feather
 	rm -f example_3.feather
 	rm -f example_4.feather
 #	
 clean_all:
 	rm *.feather
 	rm -f remembr.RDS
 	rm -f remembr*.RDS
 	rm -f robustness*.RDS
 	rm -f example_*_jobs
 	rm -f robustness_*_jobs_*
 #	sbatch --wait --verbose --array=3001-6001 run_simulation.sbatch 0 example_2_B_jobs
 # example_2_B_mecor_jobs:
@@ -159,6 +484,44 @@ clean:
 # 	sbatch --wait --verbose --array=1-3000 run_simulation.sbatch 0 example_2_B_mecor_jobs
 # 	sbatch --wait --verbose --array=3001-6001 run_simulation.sbatch 0 example_2_B_mecor_jobs
 robustness_6_jobs_p1: grid_sweep.py 03_indep_differential_nonnorm.R simulation_base.R
 	rm -f $@
 	${srun} ./$< --command 'Rscript 03_indep_differential_nonnorm.R' --arg_dict '{"N":${robustness_Ns},"m":${robustness_ms}, "seed":${seeds}, "outfile":["robustness_6.feather"],"y_explained_variance":${explained_variances}, "Bzy":[-0.3],"Bxy":[0.3], "outcome_formula":["y~x+z"], "proxy_formula":["w_pred~y+x+z"], "truth_formula":["x~z"],"sd_y_mixin":[0,1,2.5]}' --outfile $@
 robustness_6_jobs_p2: grid_sweep.py 03_indep_differential_nonnorm.R simulation_base.R
 	rm -f $@
 	${srun} ./$< --command 'Rscript 03_indep_differential_nonnorm.R' --arg_dict '{"N":${robustness_Ns},"m":${robustness_ms}, "seed":${seeds}, "outfile":["robustness_6.feather"],"y_explained_variance":${explained_variances}, "Bzy":[-0.3],"Bxy":[0.3], "outcome_formula":["y~x+z"], "proxy_formula":["w_pred~y+x+z"], "truth_formula":["x~z"],"sd_y_mixin":[5,10]}' --outfile $@
 robustness_6_jobs_p3: grid_sweep.py 03_indep_differential_nonnorm.R simulation_base.R
 	rm -f $@
 	${srun} ./$< --command 'Rscript 03_indep_differential_nonnorm.R' --arg_dict '{"N":${robustness_Ns},"m":${robustness_ms}, "seed":${seeds}, "outfile":["robustness_6.feather"],"y_explained_variance":${explained_variances}, "Bzy":[-0.3],"Bxy":[0.3], "outcome_formula":["y~x+z"], "proxy_formula":["w_pred~y+x+z"], "truth_formula":["x~z"],"sd_y_mixin":[0,1,2.5],"y_bias":[0]}' --outfile $@
 robustness_6_jobs_p4: grid_sweep.py 03_indep_differential_nonnorm.R simulation_base.R
 	rm -f $@
 	${srun} ./$< --command 'Rscript 03_indep_differential_nonnorm.R' --arg_dict '{"N":${robustness_Ns},"m":${robustness_ms}, "seed":${seeds}, "outfile":["robustness_6.feather"],"y_explained_variance":${explained_variances}, "Bzy":[-0.3],"Bxy":[0.3], "outcome_formula":["y~x+z"], "proxy_formula":["w_pred~y+x+z"], "truth_formula":["x~z"],"sd_y_mixin":[5,10],"y_bias":[0]}' --outfile $@
 robustness_6.feather: robustness_6_jobs_p1 robustness_6_jobs_p2 robustness_6_jobs_p3 robustness_6_jobs_p4
 	rm -f $@
 	$(eval END_1!=cat robustness_6_jobs_p1 | wc -l)
 	$(eval ITEMS_1!=seq $(START) $(STEP) $(END_1))
 	$(eval END_2!=cat robustness_6_jobs_p2 | wc -l)
 	$(eval ITEMS_2!=seq $(START) $(STEP) $(END_2))
 	$(eval END_3!=cat robustness_6_jobs_p3 | wc -l)
 	$(eval ITEMS_3!=seq $(START) $(STEP) $(END_3))
 	$(eval END_4!=cat robustness_6_jobs_p4 | wc -l)
 	$(eval ITEMS_4!=seq $(START) $(STEP) $(END_4))
 	# $(foreach item,$(ITEMS_1),sbatch --wait --verbose --array=$(shell expr $(item) + $(ONE))-$(shell expr $(item) + $(STEP)) run_simulation.sbatch 0 robustness_6_jobs_p1;)
 	# $(foreach item,$(ITEMS_2),sbatch --wait --verbose --array=$(shell expr $(item) + $(ONE))-$(shell expr $(item) + $(STEP)) run_simulation.sbatch 0 robustness_6_jobs_p2;)
 	# $(foreach item,$(ITEMS_3),sbatch --wait --verbose --array=$(shell expr $(item) + $(ONE))-$(shell expr $(item) + $(STEP)) run_simulation.sbatch 0 robustness_6_jobs_p3;)
 	$(foreach item,$(ITEMS_4),sbatch --wait --verbose --array=$(shell expr $(item) + $(ONE))-$(shell expr $(item) + $(STEP)) run_simulation.sbatch 0 robustness_6_jobs_p4;)
 robustness_6.RDS: plot_example.R robustness_6.feather summarize_estimator.R
 	rm -f $@
 	${srun} Rscript $< --infile $(word 2, $^) --name "robustness_6" --remember-file $@
 .PHONY: supplement
--- a/simulations/grid_sweep.py
+++ b/simulations/grid_sweep.py
@@ -5,6 +5,7 @@ from itertools import product
 import pyRemembeR
 def main(command, arg_dict, outfile, remember_file='remember_grid_sweep.RDS'):
    print(remember_file)
    remember = pyRemembeR.remember.Remember()
    remember.set_file(remember_file)
    remember[outfile] = arg_dict
--- a/simulations/measerr_methods.R
+++ b/simulations/measerr_methods.R
@@ -15,18 +15,38 @@ library(bbmle)
 ### ideal formulas for example 2
 # test.fit.2 <- measerr_mle(df, y ~ x + z, gaussian(), w_pred ~ x + z + y + y:x, binomial(link='logit'), x ~ z)
-
+likelihood.logistic <- function(model.params, outcome, model.matrix){
    ll <- vector(mode='numeric', length=length(outcome))
    ll[outcome == 1] <- plogis(model.params %*% t(model.matrix[outcome==1,]), log=TRUE)
    ll[outcome == 0] <- plogis(model.params %*% t(model.matrix[outcome==0,]), log=TRUE, lower.tail=FALSE)
    return(ll)
 }
 ## outcome_formula <- y ~ x + z; proxy_formula <- w_pred ~ y + x + z + x:z + x:y + z:y 
-measerr_mle_dv <- function(df, outcome_formula, outcome_family=binomial(link='logit'), proxy_formula, proxy_family=binomial(link='logit'),method='optim'){
+measerr_mle_dv <- function(df, outcome_formula, outcome_family=binomial(link='logit'), proxy_formula, proxy_family=binomial(link='logit'),maxit=1e6, method='optim',optim_method='L-BFGS-B'){
    df.obs <- model.frame(outcome_formula, df)
    proxy.model.matrix <- model.matrix(proxy_formula, df)
    proxy.variable <- all.vars(proxy_formula)[1]
    df.proxy.obs <- model.frame(proxy_formula,df)
    proxy.obs <- with(df.proxy.obs, eval(parse(text=proxy.variable)))
    response.var <- all.vars(outcome_formula)[1]
    y.obs <- with(df.obs,eval(parse(text=response.var)))
    outcome.model.matrix <- model.matrix(outcome_formula, df.obs)
    df.unobs <- df[is.na(df[[response.var]])]
    df.unobs.y1 <- copy(df.unobs)
    df.unobs.y1[[response.var]] <- 1
    df.unobs.y0 <- copy(df.unobs)
    df.unobs.y0[[response.var]] <- 0
    outcome.model.matrix.y1 <- model.matrix(outcome_formula, df.unobs.y1)
    proxy.model.matrix.y1 <- model.matrix(proxy_formula, df.unobs.y1)
    proxy.model.matrix.y0 <- model.matrix(proxy_formula, df.unobs.y0)
    proxy.unobs <- with(df.unobs, eval(parse(text=proxy.variable)))
    nll <- function(params){
        df.obs <- model.frame(outcome_formula, df)
        proxy.variable <- all.vars(proxy_formula)[1]
        proxy.model.matrix <- model.matrix(proxy_formula, df)
        response.var <- all.vars(outcome_formula)[1]
        y.obs <- with(df.obs,eval(parse(text=response.var)))
        outcome.model.matrix <- model.matrix(outcome_formula, df.obs)
        param.idx <- 1
        n.outcome.model.covars <- dim(outcome.model.matrix)[2]
@@ -39,12 +59,9 @@ measerr_mle_dv <- function(df, outcome_formula, outcome_family=binomial(link='lo
            ll.y.obs[y.obs==0] <- plogis(outcome.params %*% t(outcome.model.matrix[y.obs==0,]),log=TRUE,lower.tail=FALSE)
        }
        df.obs <- model.frame(proxy_formula,df)
        n.proxy.model.covars <- dim(proxy.model.matrix)[2]
        proxy.params <- params[param.idx:(n.proxy.model.covars+param.idx-1)]
        param.idx <- param.idx + n.proxy.model.covars
        proxy.obs <- with(df.obs, eval(parse(text=proxy.variable)))
        if( (proxy_family$family=="binomial") & (proxy_family$link=='logit')){
            ll.w.obs <- vector(mode='numeric',length=dim(proxy.model.matrix)[1])
@@ -54,14 +71,7 @@ measerr_mle_dv <- function(df, outcome_formula, outcome_family=binomial(link='lo
        ll.obs <- sum(ll.y.obs + ll.w.obs)
        df.unobs <- df[is.na(df[[response.var]])]
        df.unobs.y1 <- copy(df.unobs)
        df.unobs.y1[[response.var]] <- 1
        df.unobs.y0 <- copy(df.unobs)
        df.unobs.y0[[response.var]] <- 0
        ## integrate out y
        outcome.model.matrix.y1 <- model.matrix(outcome_formula, df.unobs.y1)
        if((outcome_family$family == "binomial") & (outcome_family$link == 'logit')){
            ll.y.unobs.1 <- vector(mode='numeric', length=dim(outcome.model.matrix.y1)[1])
@@ -70,10 +80,6 @@ measerr_mle_dv <- function(df, outcome_formula, outcome_family=binomial(link='lo
            ll.y.unobs.0 <- plogis(outcome.params %*% t(outcome.model.matrix.y1),log=TRUE,lower.tail=FALSE)
        }
        proxy.model.matrix.y1 <- model.matrix(proxy_formula, df.unobs.y1)
        proxy.model.matrix.y0 <- model.matrix(proxy_formula, df.unobs.y0)
        proxy.unobs <- with(df.unobs, eval(parse(text=proxy.variable)))
        if( (proxy_family$family=="binomial") & (proxy_family$link=='logit')){
            ll.w.unobs.1 <- vector(mode='numeric',length=dim(proxy.model.matrix.y1)[1])
            ll.w.unobs.0 <- vector(mode='numeric',length=dim(proxy.model.matrix.y0)[1])
@@ -100,7 +106,7 @@ measerr_mle_dv <- function(df, outcome_formula, outcome_family=binomial(link='lo
    names(start) <- params
    if(method=='optim'){
-        fit <- optim(start, fn = nll, lower=lower, method='L-BFGS-B', hessian=TRUE, control=list(maxit=1e6))
+        fit <- optim(start, fn = nll, lower=lower, method=optim_method, hessian=TRUE, control=list(maxit=maxit))
    } else {
        quoted.names <- gsub("[\\(\\)]",'',names(start))
        print(quoted.names)
@@ -109,13 +115,13 @@ measerr_mle_dv <- function(df, outcome_formula, outcome_family=binomial(link='lo
        measerr_mle_nll <- eval(parse(text=text))
        names(start) <- quoted.names
        names(lower) <- quoted.names
-        fit <- mle2(minuslogl=measerr_mle_nll, start=start, lower=lower, parnames=params,control=list(maxit=1e6),method='L-BFGS-B')
+        fit <- mle2(minuslogl=measerr_mle_nll, start=start, lower=lower, parnames=params,control=list(maxit=maxit),method=optim_method)
    }
    return(fit)
 }
-measerr_mle <- function(df, outcome_formula, outcome_family=gaussian(), proxy_formula, proxy_family=binomial(link='logit'), truth_formula, truth_family=binomial(link='logit'),method='optim'){
+measerr_mle <- function(df, outcome_formula, outcome_family=gaussian(), proxy_formula, proxy_family=binomial(link='logit'), truth_formula, truth_family=binomial(link='logit'),method='optim', maxit=1e6, optim_method='L-BFGS-B'){
    df.obs <- model.frame(outcome_formula, df)
    response.var <- all.vars(outcome_formula)[1]
@@ -125,6 +131,31 @@ measerr_mle <- function(df, outcome_formula, outcome_family=gaussian(), proxy_fo
    proxy.model.matrix <- model.matrix(proxy_formula, df)
    y.obs <- with(df.obs,eval(parse(text=response.var)))
    df.proxy.obs <- model.frame(proxy_formula,df)
    proxy.obs <- with(df.proxy.obs, eval(parse(text=proxy.variable)))
    n.proxy.model.covars <- dim(proxy.model.matrix)[2]
    df.truth.obs <- model.frame(truth_formula, df)
    truth.obs <- with(df.truth.obs, eval(parse(text=truth.variable)))
    truth.model.matrix <- model.matrix(truth_formula,df.truth.obs)
    n.truth.model.covars <- dim(truth.model.matrix)[2]
    df.unobs <- df[is.na(eval(parse(text=truth.variable)))]
    df.unobs.x1 <- copy(df.unobs)
    df.unobs.x1[,truth.variable] <- 1
    df.unobs.x0 <- copy(df.unobs)
    df.unobs.x0[,truth.variable] <- 0
    outcome.unobs <- with(df.unobs, eval(parse(text=response.var)))
    outcome.model.matrix.x0 <- model.matrix(outcome_formula, df.unobs.x0)
    outcome.model.matrix.x1 <- model.matrix(outcome_formula, df.unobs.x1)
    proxy.model.matrix.x0 <- model.matrix(proxy_formula, df.unobs.x0)
    proxy.model.matrix.x1 <- model.matrix(proxy_formula, df.unobs.x1)
    proxy.unobs <- df.unobs[[proxy.variable]]
    truth.model.matrix.unobs <- model.matrix(truth_formula, df.unobs.x0)
    measerr_mle_nll <- function(params){
        param.idx <- 1
        n.outcome.model.covars <- dim(outcome.model.matrix)[2]
@@ -137,82 +168,48 @@ measerr_mle <- function(df, outcome_formula, outcome_family=gaussian(), proxy_fo
            param.idx <- param.idx + 1
                                        #  outcome_formula likelihood using linear regression
            ll.y.obs <- dnorm(y.obs, outcome.params %*% t(outcome.model.matrix),sd=sigma.y, log=TRUE)
-        }
+        } else if( (outcome_family$family == "binomial") & (outcome_family$link == "logit") )
            ll.y.obs <- likelihood.logistic(outcome.params, y.obs, outcome.model.matrix)
        df.obs <- model.frame(proxy_formula,df)
        n.proxy.model.covars <- dim(proxy.model.matrix)[2]
        proxy.params <- params[param.idx:(n.proxy.model.covars+param.idx-1)]
        param.idx <- param.idx + n.proxy.model.covars
        proxy.obs <- with(df.obs, eval(parse(text=proxy.variable)))
-        if( (proxy_family$family=="binomial") & (proxy_family$link=='logit')){
+        if( (proxy_family$family=="binomial") & (proxy_family$link=='logit'))
-            ll.w.obs <- vector(mode='numeric',length=dim(proxy.model.matrix)[1])
+            ll.w.obs <- likelihood.logistic(proxy.params, proxy.obs, proxy.model.matrix)
                                        # proxy_formula likelihood using logistic regression
            ll.w.obs[proxy.obs==1] <- plogis(proxy.params %*% t(proxy.model.matrix[proxy.obs==1,]),log=TRUE)
            ll.w.obs[proxy.obs==0] <- plogis(proxy.params %*% t(proxy.model.matrix[proxy.obs==0,]),log=TRUE, lower.tail=FALSE)
        }
        df.obs <- model.frame(truth_formula, df)
        truth.obs <- with(df.obs, eval(parse(text=truth.variable)))
        truth.model.matrix <- model.matrix(truth_formula,df)
        n.truth.model.covars <- dim(truth.model.matrix)[2]
        truth.params <- params[param.idx:(n.truth.model.covars + param.idx - 1)]
-        if( (truth_family$family=="binomial") & (truth_family$link=='logit')){
+        if( (truth_family$family=="binomial") & (truth_family$link=='logit'))
-            ll.x.obs <- vector(mode='numeric',length=dim(truth.model.matrix)[1])
+            ll.x.obs <- likelihood.logistic(truth.params, truth.obs, truth.model.matrix)
-                                        # truth_formula likelihood using logistic regression
+                                # add the three likelihoods
            ll.x.obs[truth.obs==1] <- plogis(truth.params %*% t(truth.model.matrix[truth.obs==1,]),log=TRUE)
            ll.x.obs[truth.obs==0] <- plogis(truth.params %*% t(truth.model.matrix[truth.obs==0,]),log=TRUE, lower.tail=FALSE)
        }
                                        # add the three likelihoods
        ll.obs <- sum(ll.y.obs + ll.w.obs + ll.x.obs)
        ## likelihood for the predicted data
        ## integrate out the "truth" variable. 
        if(truth_family$family=='binomial'){
            df.unobs <- df[is.na(eval(parse(text=truth.variable)))]
            df.unobs.x1 <- copy(df.unobs)
            df.unobs.x1[,'x'] <- 1
            df.unobs.x0 <- copy(df.unobs)
            df.unobs.x0[,'x'] <- 0
            outcome.unobs <- with(df.unobs, eval(parse(text=response.var)))
            outcome.model.matrix.x0 <- model.matrix(outcome_formula, df.unobs.x0)
            outcome.model.matrix.x1 <- model.matrix(outcome_formula, df.unobs.x1)
            if(outcome_family$family=="gaussian"){
                                        # likelihood of outcome
                ll.y.x0 <- dnorm(outcome.unobs, outcome.params %*% t(outcome.model.matrix.x0), sd=sigma.y, log=TRUE)
                ll.y.x1 <- dnorm(outcome.unobs, outcome.params %*% t(outcome.model.matrix.x1), sd=sigma.y, log=TRUE)
            } else if( (outcome_family$family == "binomial") & (outcome_family$link == "logit") ){
                ll.y.x1 <- likelihood.logistic(outcome.params, outcome.unobs, outcome.model.matrix.x1)
                ll.y.x0 <- likelihood.logistic(outcome.params, outcome.unobs, outcome.model.matrix.x0)
            }
            if( (proxy_family$family=='binomial') & (proxy_family$link=='logit')){
-                proxy.model.matrix.x0 <- model.matrix(proxy_formula, df.unobs.x0)
+                ll.w.x0 <- likelihood.logistic(proxy.params, proxy.unobs, proxy.model.matrix.x0)
-                proxy.model.matrix.x1 <- model.matrix(proxy_formula, df.unobs.x1)
+                ll.w.x1 <- likelihood.logistic(proxy.params, proxy.unobs, proxy.model.matrix.x1)
                proxy.unobs <- df.unobs[[proxy.variable]]
                ll.w.x0 <- vector(mode='numeric', length=dim(df.unobs)[1])
                ll.w.x1 <- vector(mode='numeric', length=dim(df.unobs)[1])
                                        # likelihood of proxy
                ll.w.x0[proxy.unobs==1] <- plogis(proxy.params %*% t(proxy.model.matrix.x0[proxy.unobs==1,]), log=TRUE)
                ll.w.x1[proxy.unobs==1] <- plogis(proxy.params %*% t(proxy.model.matrix.x1[proxy.unobs==1,]), log=TRUE)
                ll.w.x0[proxy.unobs==0] <- plogis(proxy.params %*% t(proxy.model.matrix.x0[proxy.unobs==0,]), log=TRUE,lower.tail=FALSE)
                ll.w.x1[proxy.unobs==0] <- plogis(proxy.params %*% t(proxy.model.matrix.x1[proxy.unobs==0,]), log=TRUE,lower.tail=FALSE)
            }
            if(truth_family$link=='logit'){
                truth.model.matrix <- model.matrix(truth_formula, df.unobs.x0)
                                        # likelihood of truth
-                ll.x.x1 <- plogis(truth.params %*% t(truth.model.matrix), log=TRUE)
+                ll.x.x1 <- plogis(truth.params %*% t(truth.model.matrix.unobs), log=TRUE)
-                ll.x.x0 <- plogis(truth.params %*% t(truth.model.matrix), log=TRUE, lower.tail=FALSE)
+                ll.x.x0 <- plogis(truth.params %*% t(truth.model.matrix.unobs), log=TRUE, lower.tail=FALSE)
            }
        }
@@ -243,7 +240,7 @@ measerr_mle <- function(df, outcome_formula, outcome_family=gaussian(), proxy_fo
    names(start) <- params
    if(method=='optim'){
-        fit <- optim(start, fn = measerr_mle_nll, lower=lower, method='L-BFGS-B', hessian=TRUE, control=list(maxit=1e6))
+        fit <- optim(start, fn = measerr_mle_nll, lower=lower, method=optim_method, hessian=TRUE, control=list(maxit=maxit))
    } else { # method='mle2'
        quoted.names <- gsub("[\\(\\)]",'',names(start))
@@ -253,7 +250,7 @@ measerr_mle <- function(df, outcome_formula, outcome_family=gaussian(), proxy_fo
        measerr_mle_nll_mle <- eval(parse(text=text))
        names(start) <- quoted.names
        names(lower) <- quoted.names
-        fit <- mle2(minuslogl=measerr_mle_nll_mle, start=start, lower=lower, parnames=params,control=list(maxit=1e6),method='L-BFGS-B')
+        fit <- mle2(minuslogl=measerr_mle_nll_mle, start=start, lower=lower, parnames=params,control=list(maxit=maxit),method=optim_method)
    }
    return(fit)
@@ -431,7 +428,7 @@ measerr_irr_mle <- function(df, outcome_formula, outcome_family=gaussian(), code
 ## Experimental, and does not work.
 measerr_irr_mle_dv <- function(df, outcome_formula, outcome_family=binomial(link='logit'), coder_formulas=list(y.obs.0~y+w_pred+y.obs.1,y.obs.1~y+w_pred+y.obs.0), proxy_formula=w_pred~y, proxy_family=binomial(link='logit'),method='optim'){
    integrate.grid <- expand.grid(replicate(1 + length(coder_formulas), c(0,1), simplify=FALSE))
-    print(integrate.grid)
+#    print(integrate.grid)
    outcome.model.matrix <- model.matrix(outcome_formula, df)
@@ -527,8 +524,8 @@ measerr_irr_mle_dv <- function(df, outcome_formula, outcome_family=binomial(link
            ## likelihood of observed data 
            target <- -1 * sum(lls)
-            print(target)
+#            print(target)
-            print(params)
+#            print(params)
            return(target)
        }
    }
--- a/simulations/pl_methods.R
+++ b/simulations/pl_methods.R
@@ -0,0 +1,86 @@
 library(stats4)
 library(bbmle)
 library(matrixStats)
 zhang.mle.dv <- function(df){
    df.obs <- df[!is.na(y.obs)]
    df.unobs <- df[is.na(y.obs)]
    fp <- df.obs[(w_pred==1) & (y.obs != w_pred),.N]
    tn <- df.obs[(w_pred == 0) & (y.obs == w_pred),.N]
    fpr <- fp / (fp+tn)
    fn <- df.obs[(w_pred==0) & (y.obs != w_pred), .N]
    tp <- df.obs[(w_pred==1) & (y.obs == w_pred),.N]
    fnr <- fn / (fn+tp)
    nll <- function(B0=0, Bxy=0, Bzy=0){
        ## observed case
        ll.y.obs <- vector(mode='numeric', length=nrow(df.obs))
        ll.y.obs[df.obs$y.obs==1] <- with(df.obs[y.obs==1], plogis(B0 + Bxy * x + Bzy * z,log=T))
        ll.y.obs[df.obs$y.obs==0] <- with(df.obs[y.obs==0], plogis(B0 + Bxy * x + Bzy * z,log=T,lower.tail=FALSE))
        ll <- sum(ll.y.obs)
        pi.y.1 <- with(df.unobs,plogis(B0 + Bxy * x + Bzy*z, log=T))
        #pi.y.0 <- with(df.unobs,plogis(B0 + Bxy * x + Bzy*z, log=T,lower.tail=FALSE))
        lls <- with(df.unobs, colLogSumExps(rbind(w_pred * colLogSumExps(rbind(log(fpr), log(1 - fnr - fpr)+pi.y.1)),
        (1-w_pred) * (log(1-fpr) - exp(log(1-fnr-fpr)+pi.y.1)))))
        ll <- ll + sum(lls)
 #        print(paste0(B0,Bxy,Bzy))
 #        print(ll)
        return(-ll)
    }    
    mlefit <- mle2(minuslogl = nll, control=list(maxit=1e6),method='L-BFGS-B',lower=c(B0=-Inf, Bxy=-Inf, Bzy=-Inf),
                   upper=c(B0=Inf, Bxy=Inf, Bzy=Inf))
    return(mlefit)
 }
 ## model from Zhang's arxiv paper, with predictions for y
 ## Zhang got this model from Hausman 1998
 zhang.mle.iv <- function(df){
    df.obs <- df[!is.na(x.obs)]
    df.unobs <- df[is.na(x.obs)]
    tn <- df.obs[(w_pred == 0) & (x.obs == w_pred),.N]
    fn <- df.obs[(w_pred==0) & (x.obs==1), .N]
    npv <- tn / (tn + fn)
    tp <- df.obs[(w_pred==1) & (x.obs == w_pred),.N]
    fp <- df.obs[(w_pred==1) & (x.obs == 0),.N]
    ppv <- tp / (tp + fp)
    nll <- function(B0=0, Bxy=0, Bzy=0, sigma_y=9){
    ## fpr = 1 - TNR
    ### Problem: accounting for uncertainty in ppv / npv
    ## fnr = 1 - TPR
    ll.y.obs <- with(df.obs, dnorm(y, B0 + Bxy * x + Bzy * z, sd=sigma_y,log=T))
    ll <- sum(ll.y.obs)
    # unobserved case; integrate out x
    ll.x.1 <- with(df.unobs, dnorm(y, B0 + Bxy + Bzy * z, sd = sigma_y, log=T))
    ll.x.0 <- with(df.unobs, dnorm(y, B0 + Bzy * z, sd = sigma_y,log=T))
    ## case x == 1
    lls.x.1 <- colLogSumExps(rbind(log(ppv) + ll.x.1, log(1-ppv) + ll.x.0))
    ## case x == 0
    lls.x.0 <- colLogSumExps(rbind(log(1-npv) + ll.x.1, log(npv) + ll.x.0))
    lls <- colLogSumExps(rbind(df.unobs$w_pred * lls.x.1, (1-df.unobs$w_pred) * lls.x.0))
    ll <- ll + sum(lls)
    }    
    mlefit <- mle2(minuslogl = nll, control=list(maxit=1e6), lower=list(sigma_y=0.00001, B0=-Inf, Bxy=-Inf, Bzy=-Inf),
                   upper=list(sigma_y=Inf, B0=Inf, Bxy=Inf, Bzy=Inf),method='L-BFGS-B')
    return(mlefit)
 }
--- a/simulations/plot_dv_example.R
+++ b/simulations/plot_dv_example.R
@@ -6,7 +6,7 @@ library(filelock)
 library(argparser)
 parser <- arg_parser("Simulate data and fit corrected models.")
-parser <- add_argument(parser, "--infile", default="example_4.feather", help="name of the file to read.")
+parser <- add_argument(parser, "--infile", default="robustness_3_dv.feather", help="name of the file to read.")
 parser <- add_argument(parser, "--remember-file", default="remembr.RDS", help="name of the remember file.")
 parser <- add_argument(parser, "--name", default="", help="The name to safe the data to in the remember file.")
 args <- parse_args(parser)
@@ -86,9 +86,6 @@ build_plot_dataset <- function(df){
 change.remember.file(args$remember_file, clear=TRUE)
 sims.df <- read_feather(args$infile)
 sims.df[,Bzx:=NA]
 sims.df[,y_explained_variance:=NA]
 sims.df[,accuracy_imbalance_difference:=NA]
 plot.df <- build_plot_dataset(sims.df)
 remember(plot.df,args$name)
@@ -97,6 +94,7 @@ set.remember.prefix(gsub("plot.df.","",args$name))
 remember(median(sims.df$cor.xz),'med.cor.xz')
 remember(median(sims.df$accuracy),'med.accuracy')
 remember(mean(sims.df$accuracy),'mean.accuracy')
 remember(median(sims.df$error.cor.x),'med.error.cor.x')
 remember(median(sims.df$error.cor.z),'med.error.cor.z')
 remember(median(sims.df$lik.ratio),'med.lik.ratio')
--- a/simulations/plot_example.R
+++ b/simulations/plot_example.R
@@ -9,7 +9,7 @@ source("summarize_estimator.R")
 parser <- arg_parser("Simulate data and fit corrected models.")
-parser <- add_argument(parser, "--infile", default="example_2.feather", help="name of the file to read.")
+parser <- add_argument(parser, "--infile", default="robustness_2.feather", help="name of the file to read.")
 parser <- add_argument(parser, "--remember-file", default="remembr.RDS", help="name of the remember file.")
 parser <- add_argument(parser, "--name", default="", help="The name to safe the data to in the remember file.")
 args <- parse_args(parser)
@@ -47,7 +47,7 @@ args <- parse_args(parser)
 ##                           var.est = var(.SD[[paste0('B',coefname,'y.est.',suffix)]]),
 ##                           est.upper.95 = quantile(.SD[[paste0('B',coefname,'y.est.',suffix)]],0.95,na.rm=T),
 ##                           est.lower.95 = quantile(.SD[[paste0('B',coefname,'y.est.',suffix)]],0.05,na.rm=T),
-##                           N.sims = .N,
+##                           N.sims = .
 ##                           p.sign.correct = mean(as.integer(sign.correct & (! zero.in.ci))),
 ##                           variable=coefname,
 ##                           method=suffix
--- a/simulations/robustness_check_notes.md
+++ b/simulations/robustness_check_notes.md
@@ -0,0 +1,39 @@
 # robustness\_1.RDS
 Tests how robust the MLE method for independent variables with differential error is when the model for $X$ is less precise. In the main paper, we include $Z$ on the right-hand-side of the `truth_formula`. 
 In this robustness check, the `truth_formula` is an intercept-only model.
 The stats are in the list named `robustness_1` in the `.RDS` 
 # robustness\_1\_dv.RDS
 Like `robustness\_1.RDS` but with a less precise model for $w_pred$.  In the main paper, we included $Z$ in the `proxy_formula`. In this robustness check, we do not.
 # robustness_2.RDS
 This is just example 1 with varying levels of classifier accuracy indicated by the `prediction_accuracy` variable.. 
 # robustness_2_dv.RDS
 Example 3 with varying levels of classifier accuracy indicated by the `prediction_accuracy` variable.
 # robustness_3.RDS
 Example 1 with varying levels of skewness in the classified variable. The variable `Px` is the baserate of $X$ and controls the skewness of $X$.
 It probably makes more sense to report the mean of $X$ instead of `Px` in the supplement.
 # robustness_3_dv.RDS
 Example 3 with varying levels of skewness in the classified variable. The variable `B0` is the intercept of the main model and controls the skewness of $Y$.
 It probably makes more sense to report the mean of $Y$ instead of B0 in the supplement. 
 # robustness_4.RDS
 Example 2 with varying amounts of differential error. The variable `y_bias` controls the amount of differential error.
 It probably makes more sense to report the corrleation between $Y$ and $X-~$, or the difference in accuracy from when when $Y=1$ to $Y=0$ in the supplement instead of `y_bias`.
 # robustness_4_dv.RDS
 Example 4 with varying amounts of bias. The variable `z_bias` controls the amount of differential error.
 It probably makes more sense to report the corrleation between $Z$ and $Y-W$, or the difference in accuracy from when when $Z=1$ to $Z=0$ in the supplement instead of `z_bias`.
--- a/simulations/run_job.sbatch
+++ b/simulations/run_job.sbatch
@@ -0,0 +1,17 @@
 #!/bin/bash
 #SBATCH --job-name="simulate measurement error models"
 ## Allocation Definition
 #SBATCH --account=comdata-ckpt
 #SBATCH --partition=ckpt
 ## Resources
 #SBATCH --nodes=1    
 ## Walltime (4 hours)
 #SBATCH --time=4:00:00
 ## Memory per node
 #SBATCH --mem=4G
 #SBATCH --ntasks-per-node=1
 #SBATCH --chdir /gscratch/comdata/users/nathante/ml_measurement_error_public/simulations
 #SBATCH --output=simulation_jobs/%A_%a.out
 #SBATCH --error=simulation_jobs/%A_%a.err
 echo "$@"
 "$@"
--- a/simulations/simulation_base.R
+++ b/simulations/simulation_base.R
@@ -89,7 +89,7 @@ my.mle <- function(df){
    return(mlefit)
 }
-run_simulation_depvar <- function(df, result, outcome_formula=y~x+z, proxy_formula=w_pred~y){
+run_simulation_depvar <- function(df, result, outcome_formula=y~x+z, proxy_formula=w_pred~y, confint_method='quad'){
    (accuracy <- df[,mean(w_pred==y)])
    result <- append(result, list(accuracy=accuracy))
@@ -150,11 +150,13 @@ run_simulation_depvar <- function(df, result, outcome_formula=y~x+z, proxy_formu
    temp.df <- copy(df)
    temp.df[,y:=y.obs]
    mod.caroll.lik <- measerr_mle_dv(temp.df, outcome_formula=outcome_formula, proxy_formula=proxy_formula)
-    fisher.info <- solve(mod.caroll.lik$hessian)
+    fischer.info <- solve(mod.caroll.lik$hessian)
    coef <- mod.caroll.lik$par
-    ci.upper <- coef + sqrt(diag(fisher.info)) * 1.96
+    ci.upper <- coef + sqrt(diag(fischer.info)) * 1.96
-    ci.lower <- coef - sqrt(diag(fisher.info)) * 1.96
+    ci.lower <- coef - sqrt(diag(fischer.info)) * 1.96
    result <- append(result,
                     list(Bxy.est.mle = coef['x'],
                          Bxy.ci.upper.mle = ci.upper['x'],
@@ -163,6 +165,19 @@ run_simulation_depvar <- function(df, result, outcome_formula=y~x+z, proxy_formu
                          Bzy.ci.upper.mle = ci.upper['z'],
                          Bzy.ci.lower.mle = ci.lower['z']))
    mod.caroll.profile.lik <- measerr_mle_dv(temp.df, outcome_formula=outcome_formula, proxy_formula=proxy_formula, method='bbmle')
    coef <- coef(mod.caroll.profile.lik)
    ci <- confint(mod.caroll.profile.lik, method='spline')
    ci.lower <- ci[,'2.5 %']
    ci.upper <- ci[,'97.5 %']
    result <- append(result,
                     list(Bxy.est.mle.profile = coef['x'],
                          Bxy.ci.upper.mle.profile = ci.upper['x'],
                          Bxy.ci.lower.mle.profile = ci.lower['x'],
                          Bzy.est.mle.profile = coef['z'],
                          Bzy.ci.upper.mle.profile = ci.upper['z'],
                          Bzy.ci.lower.mle.profile = ci.lower['z']))
    ## my implementatoin of liklihood based correction
    mod.zhang <- zhang.mle.dv(df)
@@ -180,26 +195,35 @@ run_simulation_depvar <- function(df, result, outcome_formula=y~x+z, proxy_formu
    # amelia says use normal distribution for binary variables.
    amelia_result <- list(Bxy.est.amelia.full = NA,
                          Bxy.ci.upper.amelia.full = NA,
                          Bxy.ci.lower.amelia.full = NA,
                          Bzy.est.amelia.full = NA,
                          Bzy.ci.upper.amelia.full = NA,
                          Bzy.ci.lower.amelia.full = NA
                          )
-    amelia.out.k <- amelia(df, m=200, p2s=0, idvars=c('y','ystar','w'))
+    tryCatch({
-    mod.amelia.k <- zelig(y.obs~x+z, model='ls', data=amelia.out.k$imputations, cite=FALSE)
+        amelia.out.k <- amelia(df, m=200, p2s=0, idvars=c('y','ystar','w'),ords="y.obs")
-    (coefse <- combine_coef_se(mod.amelia.k, messages=FALSE))
+        mod.amelia.k <- zelig(y.obs~x+z, model='logit', data=amelia.out.k$imputations, cite=FALSE)
-    est.x.mi <- coefse['x','Estimate']
+        (coefse <- combine_coef_se(mod.amelia.k, messages=FALSE))
-    est.x.se <- coefse['x','Std.Error']
+        est.x.mi <- coefse['x','Estimate']
-    result <- append(result,
+        est.x.se <- coefse['x','Std.Error']
-                     list(Bxy.est.amelia.full = est.x.mi,
+
        est.z.mi <- coefse['z','Estimate']
        est.z.se <- coefse['z','Std.Error']
        amelia_result <- list(Bxy.est.amelia.full = est.x.mi,
                          Bxy.ci.upper.amelia.full = est.x.mi + 1.96 * est.x.se,
-                          Bxy.ci.lower.amelia.full = est.x.mi - 1.96 * est.x.se
+                          Bxy.ci.lower.amelia.full = est.x.mi - 1.96 * est.x.se,
-                          ))
+                          Bzy.est.amelia.full = est.z.mi,
    est.z.mi <- coefse['z','Estimate']
    est.z.se <- coefse['z','Std.Error']
    result <- append(result,
                     list(Bzy.est.amelia.full = est.z.mi,
                          Bzy.ci.upper.amelia.full = est.z.mi + 1.96 * est.z.se,
                          Bzy.ci.lower.amelia.full = est.z.mi - 1.96 * est.z.se
-                          ))
+                          )
    },
    error = function(e){
    result[['error']] <- e}
    )
    result <- append(result,amelia_result)
    return(result)
@@ -207,7 +231,7 @@ run_simulation_depvar <- function(df, result, outcome_formula=y~x+z, proxy_formu
 ## outcome_formula, proxy_formula, and truth_formula are passed to measerr_mle 
-run_simulation <-  function(df, result, outcome_formula=y~x+z, proxy_formula=NULL, truth_formula=NULL){
+run_simulation <-  function(df, result, outcome_formula=y~x+z, proxy_formula=NULL, truth_formula=NULL, confint_method='quad'){
    accuracy <- df[,mean(w_pred==x)]
    accuracy.y0 <- df[y<=0,mean(w_pred==x)]
@@ -272,57 +296,119 @@ run_simulation <-  function(df, result, outcome_formula=y~x+z, proxy_formula=NUL
                                  Bzy.ci.upper.naive = naive.ci.Bzy[2],
                                  Bzy.ci.lower.naive = naive.ci.Bzy[1]))
    amelia_result <- list(
        Bxy.est.amelia.full = NULL,
        Bxy.ci.upper.amelia.full = NULL,
        Bxy.ci.lower.amelia.full = NULL,
        Bzy.est.amelia.full = NULL,
        Bzy.ci.upper.amelia.full = NULL,
        Bzy.ci.lower.amelia.full = NULL
        )
    tryCatch({
        amelia.out.k <- amelia(df, m=200, p2s=0, idvars=c('x','w'))
        mod.amelia.k <- zelig(y~x.obs+z, model='ls', data=amelia.out.k$imputations, cite=FALSE)
        (coefse <- combine_coef_se(mod.amelia.k))
        est.x.mi <- coefse['x.obs','Estimate']
        est.x.se <- coefse['x.obs','Std.Error']
        est.z.mi <- coefse['z','Estimate']
        est.z.se <- coefse['z','Std.Error']
        amelia_result <- list(Bxy.est.amelia.full = est.x.mi,
                              Bxy.ci.upper.amelia.full = est.x.mi + 1.96 * est.x.se,
                              Bxy.ci.lower.amelia.full = est.x.mi - 1.96 * est.x.se,
                              Bzy.est.amelia.full = est.z.mi,
                              Bzy.ci.upper.amelia.full = est.z.mi + 1.96 * est.z.se,
                              Bzy.ci.lower.amelia.full = est.z.mi - 1.96 * est.z.se
                              )
    },
    error = function(e){
        result[['error']] <- e}
    )
-    amelia.out.k <- amelia(df, m=200, p2s=0, idvars=c('x','w'))
+    result <- append(result, amelia_result)
    mod.amelia.k <- zelig(y~x.obs+z, model='ls', data=amelia.out.k$imputations, cite=FALSE)
    (coefse <- combine_coef_se(mod.amelia.k, messages=FALSE))
    est.x.mi <- coefse['x.obs','Estimate']
    est.x.se <- coefse['x.obs','Std.Error']
    result <- append(result,
                     list(Bxy.est.amelia.full = est.x.mi,
                          Bxy.ci.upper.amelia.full = est.x.mi + 1.96 * est.x.se,
                          Bxy.ci.lower.amelia.full = est.x.mi - 1.96 * est.x.se
                          ))
    est.z.mi <- coefse['z','Estimate']
    est.z.se <- coefse['z','Std.Error']
    result <- append(result,
                     list(Bzy.est.amelia.full = est.z.mi,
                          Bzy.ci.upper.amelia.full = est.z.mi + 1.96 * est.z.se,
                          Bzy.ci.lower.amelia.full = est.z.mi - 1.96 * est.z.se
                          ))
   mle_result <- list(Bxy.est.mle = NULL,
                      Bxy.ci.upper.mle = NULL,
                      Bxy.ci.lower.mle = NULL,
                      Bzy.est.mle = NULL,
                      Bzy.ci.upper.mle = NULL,
                      Bzy.ci.lower.mle = NULL)
    tryCatch({
        temp.df <- copy(df)
        temp.df <- temp.df[,x:=x.obs]
-        mod.caroll.lik <- measerr_mle(temp.df, outcome_formula=outcome_formula, proxy_formula=proxy_formula, truth_formula=truth_formula)
+        mod.caroll.lik <- measerr_mle(temp.df, outcome_formula=outcome_formula, proxy_formula=proxy_formula, truth_formula=truth_formula, method='optim')
-        fisher.info <- solve(mod.caroll.lik$hessian)
+        fischer.info <- solve(mod.caroll.lik$hessian)
        coef <- mod.caroll.lik$par
-        ci.upper <- coef + sqrt(diag(fisher.info)) * 1.96
+        ci.upper <- coef + sqrt(diag(fischer.info)) * 1.96
-        ci.lower <- coef - sqrt(diag(fisher.info)) * 1.96
+        ci.lower <- coef - sqrt(diag(fischer.info)) * 1.96
        mle_result <- list(Bxy.est.mle = coef['x'],
                           Bxy.ci.upper.mle = ci.upper['x'],
                           Bxy.ci.lower.mle = ci.lower['x'],
                           Bzy.est.mle = coef['z'],
                           Bzy.ci.upper.mle = ci.upper['z'],
                           Bzy.ci.lower.mle = ci.lower['z'])
        },
        error=function(e) {result[['error']] <- as.character(e)
        })
-        result <- append(result,
+    result <- append(result, mle_result)
-                         list(Bxy.est.mle = coef['x'],
+    mle_result_proflik <- list(Bxy.est.mle.profile = NULL,
-                              Bxy.ci.upper.mle = ci.upper['x'],
+                               Bxy.ci.upper.mle.profile = NULL,
-                              Bxy.ci.lower.mle = ci.lower['x'],
+                               Bxy.ci.lower.mle.profile = NULL,
-                              Bzy.est.mle = coef['z'],
+                               Bzy.est.mle.profile = NULL,
-                              Bzy.ci.upper.mle = ci.upper['z'],
+                               Bzy.ci.upper.mle.profile = NULL,
-                              Bzy.ci.lower.mle = ci.lower['z']))
+                               Bzy.ci.lower.mle.profile = NULL)
-        mod.zhang.lik <- zhang.mle.iv(df)
+    tryCatch({
-        coef <- coef(mod.zhang.lik)
+        ## confint_method == 'bbmle'
-        ci <- confint(mod.zhang.lik,method='quad')
+        mod.caroll.lik <- measerr_mle(temp.df, outcome_formula=outcome_formula, proxy_formula=proxy_formula, truth_formula=truth_formula, method='bbmle')
-        result <- append(result,
+        coef <- coef(mod.caroll.lik)
-                         list(Bxy.est.zhang = coef['Bxy'],
+        ci <- confint(mod.caroll.lik, method='spline')
-                              Bxy.ci.upper.zhang = ci['Bxy','97.5 %'],
+        ci.lower <- ci[,'2.5 %']
-                              Bxy.ci.lower.zhang = ci['Bxy','2.5 %'],
+        ci.upper <- ci[,'97.5 %']
-                              Bzy.est.zhang = coef['Bzy'],
+
-                              Bzy.ci.upper.zhang = ci['Bzy','97.5 %'],
+        mle_result_proflik <- list(Bxy.est.mle.profile = coef['x'],
-                              Bzy.ci.lower.zhang = ci['Bzy','2.5 %']))
+                                   Bxy.ci.upper.mle.profile = ci.upper['x'],
                                   Bxy.ci.lower.mle.profile = ci.lower['x'],
                                   Bzy.est.mle.profile = coef['z'],
                                   Bzy.ci.upper.mle.profile = ci.upper['z'],
                                   Bzy.ci.lower.mle.profile = ci.lower['z'])
    },
    error=function(e) {result[['error']] <- as.character(e)
    })
    result <- append(result, mle_result_proflik)
    zhang_result <- list(Bxy.est.mle.zhang = NULL,
                       Bxy.ci.upper.mle.zhang = NULL,
                       Bxy.ci.lower.mle.zhang = NULL,
                       Bzy.est.mle.zhang = NULL,
                       Bzy.ci.upper.mle.zhang = NULL,
                       Bzy.ci.lower.mle.zhang = NULL)
    tryCatch({
    mod.zhang.lik <- zhang.mle.iv(df)
    coef <- coef(mod.zhang.lik)
    ci <- confint(mod.zhang.lik,method='quad')
    zhang_result <- list(Bxy.est.zhang = coef['Bxy'],
                         Bxy.ci.upper.zhang = ci['Bxy','97.5 %'],
                         Bxy.ci.lower.zhang = ci['Bxy','2.5 %'],
                         Bzy.est.zhang = coef['Bzy'],
                         Bzy.ci.upper.zhang = ci['Bzy','97.5 %'],
                         Bzy.ci.lower.zhang = ci['Bzy','2.5 %'])
    },
    error=function(e) {result[['error']] <- as.character(e)
    })
    result <- append(result, zhang_result)
    ## What if we can't observe k -- most realistic scenario. We can't include all the ML features in a model.
    ## amelia.out.nok <- amelia(df, m=200, p2s=0, idvars=c("x","w_pred"), noms=noms)
--- a/simulations/summarize_estimator.R
+++ b/simulations/summarize_estimator.R
@@ -1,18 +1,23 @@
 library(ggdist)
-summarize.estimator <- function(df, suffix='naive', coefname='x'){
+summarize.estimator <- function(sims.df, suffix='naive', coefname='x'){
-    part <- df[,c('N',
+    reported_vars <- c(
-                  'm',
+                       'Bxy',
-                  'Bxy',
+                       paste0('B',coefname,'y.est.',suffix),
-                  paste0('B',coefname,'y.est.',suffix),
+                       paste0('B',coefname,'y.ci.lower.',suffix),
-                  paste0('B',coefname,'y.ci.lower.',suffix),
+                       paste0('B',coefname,'y.ci.upper.',suffix)
-                  paste0('B',coefname,'y.ci.upper.',suffix),
+                       )
-                  'y_explained_variance',
+
-                  'Bzx',
+    
-                  'Bzy',
+    grouping_vars <- c('N','m','B0', 'Bxy', 'Bzy', 'Bzx', 'Px', 'Py','y_explained_variance', 'prediction_accuracy','outcome_formula','proxy_formula','truth_formula','z_bias','y_bias')
-                  'accuracy_imbalance_difference'
+
-                  ),
+    grouping_vars <- grouping_vars[grouping_vars %in% names(df)]
-               with=FALSE]
+
    part <- sims.df[,
                    unique(c(reported_vars,
                             grouping_vars)),
                    with=FALSE]
    true.in.ci <- as.integer((part$Bxy >= part[[paste0('B',coefname,'y.ci.lower.',suffix)]]) & (part$Bxy <= part[[paste0('B',coefname,'y.ci.upper.',suffix)]]))
@@ -25,14 +30,17 @@ summarize.estimator <- function(df, suffix='naive', coefname='x'){
                       bias=bias,
                       sign.correct =sign.correct)]
    part.plot <- part[, .(p.true.in.ci = mean(true.in.ci),
                          mean.bias = mean(bias),
-                          mean.est = mean(.SD[[paste0('B',coefname,'y.est.',suffix)]]),
+                          mean.est = mean(.SD[[paste0('B',coefname,'y.est.',suffix)]],na.rm=T),
-                          var.est = var(.SD[[paste0('B',coefname,'y.est.',suffix)]]),
+                          var.est = var(.SD[[paste0('B',coefname,'y.est.',suffix)]],na.rm=T),
                          est.upper.95 = quantile(.SD[[paste0('B',coefname,'y.est.',suffix)]],0.975,na.rm=T),
                          est.lower.95 = quantile(.SD[[paste0('B',coefname,'y.est.',suffix)]],0.025,na.rm=T),
                          mean.ci.upper = mean(.SD[[paste0('B',coefname,'y.ci.upper.',suffix)]],na.rm=T),
                          mean.ci.lower = mean(.SD[[paste0('B',coefname,'y.ci.lower.',suffix)]],na.rm=T),
                          median.ci.upper = median(.SD[[paste0('B',coefname,'y.ci.upper.',suffix)]],na.rm=T),
                          median.ci.lower = median(.SD[[paste0('B',coefname,'y.ci.lower.',suffix)]],na.rm=T),
                          ci.upper.975 = quantile(.SD[[paste0('B',coefname,'y.ci.upper.',suffix)]],0.975,na.rm=T),
                          ci.upper.025 = quantile(.SD[[paste0('B',coefname,'y.ci.upper.',suffix)]],0.025,na.rm=T),
                          ci.lower.975 = quantile(.SD[[paste0('B',coefname,'y.ci.lower.',suffix)]],0.975,na.rm=T),
@@ -43,7 +51,7 @@ summarize.estimator <- function(df, suffix='naive', coefname='x'){
                          variable=coefname,
                          method=suffix
                          ),
-                      by=c("N","m",'y_explained_variance','Bzx', 'Bzy', 'accuracy_imbalance_difference')
+                      by=grouping_vars,
                      ]
    return(part.plot)
Author	SHA1	Message	Date
Nathan TeBlunthuis	fbf0de0641	update submodules.	2025-06-04 13:43:20 -07:00
Nathan TeBlunthuis	d900230ebc	add changes from local.	2025-06-04 13:40:53 -07:00
Nathan TeBlunthuis	214551f74c	changes from klone	2023-09-08 09:01:31 -07:00
Nathan TeBlunthuis	bb6f5e4731	Merge branch 'master' of code:ml_measurement_error_public	2023-08-12 13:10:19 -07:00
Nathan TeBlunthuis	d9d3e47a44	real-data example on raw perspective scores	2023-08-12 13:09:31 -07:00
Nathan TeBlunthuis	6b410335f8	Unstash work on misclassification models	2023-03-03 09:19:17 -08:00
Nathan TeBlunthuis	ce6d12d4e6	pass through optimization parameters	2023-03-01 10:39:35 -08:00
Nathan TeBlunthuis	c1dbbfd0dd	update simulation base from hyak	2023-02-28 16:28:35 -08:00
Nathan TeBlunthuis	69948cae1e	update plotting code	2023-02-28 16:14:34 -08:00
Nathan TeBlunthuis	acb119418a	update simulations code	2023-02-28 16:13:36 -08:00
Nathan TeBlunthuis	b8d2048cc5	Make summarize estimator group correctly for robustness checks. Also fix a possible bug in the MI logic and simplify the error correction formula in example 2.	2023-02-11 12:26:48 -08:00
Nathan TeBlunthuis	c45ea9dfeb	update real data examples code and rerun project.	2023-01-11 10:59:50 -08:00
Nathan TeBlunthuis	c066f900d3	bugfix in example.	2023-01-06 12:27:04 -08:00
Nathan TeBlunthuis	fa05dbab6b	check in some old simulation updates and a dv examples with real data	2023-01-06 12:22:41 -08:00
Nathan TeBlunthuis	d8bc08f18f	Added, but didn't test the remaining robustness checks.	2022-12-11 22:46:30 -08:00
Nathan TeBlunthuis	8ac33c14d7	Add another robustness check for varying levels of accuracy.	2022-12-11 14:42:06 -08:00
Nathan TeBlunthuis	82fe7b0f48	cleaning up + implementing robustness checks + add pl_methods.R + update makefile + fix bug in 02_indep_differential.R + start documenting robustness checks in robustness_check_notes.md	2022-12-11 12:54:34 -08:00
Nathan TeBlunthuis	3d1964b806	Add exploratory data analysis to come up with a real-data example.	2022-11-29 00:29:42 -08:00
		`@@ -0,0 +1,2 @@`
							`#!/usr/bin/bash`
							`cat $1 \| jq "[.attributeScores.IDENTITY_ATTACK.summaryScore.value, .attributeScores.INSULT.summaryScore.value, .attributeScores.PROFANITY.summaryScore.value,.attributeScores.SEVERE_TOXICITY.summaryScore.value, .attributeScores.THREAT.summaryScore.value, .attributeScores.TOXICITY.summaryScore.value] \| @csv" > $2`
		`@@ -1 +0,0 @@`
			`../../../charts/example_1_dag/example_1_dag.png`
		`@@ -1 +0,0 @@`
			`../../../charts/example_2_dag/example_2_dag.png`
		`@@ -1 +0,0 @@`
			`../../../charts/example_3_dag/example_3_dag.png`