Added, but didn't test the remaining robustness checks.
This commit is contained in:
parent
8ac33c14d7
commit
d8bc08f18f
@ -30,11 +30,11 @@ source("simulation_base.R")
|
||||
#### how much power do we get from the model in the first place? (sweeping N and m)
|
||||
####
|
||||
|
||||
simulate_data <- function(N, m, B0=0, Bxy=0.2, Bzy=-0.2, Bzx=0.2, y_explained_variance=0.025, prediction_accuracy=0.73, seed=1){
|
||||
simulate_data <- function(N, m, B0=0, Bxy=0.2, Bzy=-0.2, Bzx=0.2, y_explained_variance=0.025, prediction_accuracy=0.73, Px=0.5, seed=1){
|
||||
set.seed(seed)
|
||||
z <- rnorm(N,sd=0.5)
|
||||
# x.var.epsilon <- var(Bzx *z) * ((1-zx_explained_variance)/zx_explained_variance)
|
||||
xprime <- Bzx * z #+ x.var.epsilon
|
||||
xprime <- Bzx * z + qlogis(Px)
|
||||
x <- rbinom(N,1,plogis(xprime))
|
||||
|
||||
y.var.epsilon <- (var(Bzy * z) + var(Bxy *x) + 2*cov(Bxy*x,Bzy*z)) * ((1-y_explained_variance)/y_explained_variance)
|
||||
@ -78,16 +78,18 @@ parser <- add_argument(parser, "--truth_formula", help='formula for the true var
|
||||
parser <- add_argument(parser, "--Bzx", help='Effect of z on x', default=0.3)
|
||||
parser <- add_argument(parser, "--Bzy", help='Effect of z on y', default=-0.3)
|
||||
parser <- add_argument(parser, "--Bxy", help='Effect of x on y', default=0.3)
|
||||
parser <- add_argument(parser, "--Px", help='Base rate of x', default=0.5)
|
||||
|
||||
args <- parse_args(parser)
|
||||
B0 <- 0
|
||||
Px <- args$Px
|
||||
Bxy <- args$Bxy
|
||||
Bzy <- args$Bzy
|
||||
Bzx <- args$Bzx
|
||||
|
||||
df <- simulate_data(args$N, args$m, B0, Bxy, Bzy, Bzx, seed=args$seed + 500, y_explained_variance = args$y_explained_variance, prediction_accuracy=args$prediction_accuracy)
|
||||
df <- simulate_data(args$N, args$m, B0, Bxy, Bzy, Bzx, Px, seed=args$seed + 500, y_explained_variance = args$y_explained_variance, prediction_accuracy=args$prediction_accuracy)
|
||||
|
||||
result <- list('N'=args$N,'m'=args$m,'B0'=B0,'Bxy'=Bxy, Bzx=Bzx, 'Bzy'=Bzy, 'seed'=args$seed, 'y_explained_variance'=args$y_explained_variance, 'prediction_accuracy'=args$prediction_accuracy, 'accuracy_imbalance_difference'=args$accuracy_imbalance_difference, 'outcome_formula'=args$outcome_formula, 'proxy_formula'=args$proxy_formula,truth_formula=args$truth_formula, error='')
|
||||
result <- list('N'=args$N,'m'=args$m,'B0'=B0,'Bxy'=Bxy, Bzx=Bzx, 'Bzy'=Bzy, 'Px'=Px, 'seed'=args$seed, 'y_explained_variance'=args$y_explained_variance, 'prediction_accuracy'=args$prediction_accuracy, 'accuracy_imbalance_difference'=args$accuracy_imbalance_difference, 'outcome_formula'=args$outcome_formula, 'proxy_formula'=args$proxy_formula,truth_formula=args$truth_formula, error='')
|
||||
|
||||
outline <- run_simulation(df, result, outcome_formula=as.formula(args$outcome_formula), proxy_formula=as.formula(args$proxy_formula), truth_formula=as.formula(args$truth_formula))
|
||||
|
||||
|
@ -31,11 +31,11 @@ source("simulation_base.R")
|
||||
|
||||
## one way to do it is by adding correlation to x.obs and y that isn't in w.
|
||||
## in other words, the model is missing an important feature of x.obs that's related to y.
|
||||
simulate_data <- function(N, m, B0, Bxy, Bzx, Bzy, seed, y_explained_variance=0.025, prediction_accuracy=0.73, y_bias=-0.8,z_bias=0,accuracy_imbalance_difference=0.3){
|
||||
simulate_data <- function(N, m, B0, Bxy, Bzx, Bzy, seed, y_explained_variance=0.025, prediction_accuracy=0.73, y_bias=-0.8,z_bias=0,Px=0.5,accuracy_imbalance_difference=0.3){
|
||||
set.seed(seed)
|
||||
# make w and y dependent
|
||||
z <- rnorm(N,sd=0.5)
|
||||
x <- rbinom(N, 1, plogis(Bzx * z))
|
||||
x <- rbinom(N, 1, plogis(Bzx * z + qlogis(Px)))
|
||||
|
||||
y.var.epsilon <- (var(Bzy * z) + var(Bxy *x) + 2*cov(Bzy*z,Bxy*x)) * ((1-y_explained_variance)/y_explained_variance)
|
||||
y.epsilon <- rnorm(N, sd = sqrt(y.var.epsilon))
|
||||
@ -140,10 +140,12 @@ parser <- add_argument(parser, "--proxy_formula", help='formula for the proxy va
|
||||
parser <- add_argument(parser, "--y_bias", help='coefficient of y on the probability a classification is correct', default=-0.5)
|
||||
parser <- add_argument(parser, "--z_bias", help='coefficient of z on the probability a classification is correct', default=0)
|
||||
parser <- add_argument(parser, "--truth_formula", help='formula for the true variable', default="x~z")
|
||||
parser <- add_argument(parser, "--Px", help='base rate of x', default=0.5)
|
||||
|
||||
args <- parse_args(parser)
|
||||
|
||||
B0 <- 0
|
||||
Px <- args$Px
|
||||
Bxy <- args$Bxy
|
||||
Bzy <- args$Bzy
|
||||
Bzx <- args$Bzx
|
||||
@ -157,7 +159,7 @@ if(args$m < args$N){
|
||||
## pc.df <- pc(suffStat=list(C=cor(df.pc),n=nrow(df.pc)),indepTest=gaussCItest,labels=names(df.pc),alpha=0.05)
|
||||
## plot(pc.df)
|
||||
|
||||
result <- list('N'=args$N,'m'=args$m,'B0'=B0,'Bxy'=Bxy, Bzx=args$Bzx, 'Bzy'=Bzy, 'seed'=args$seed, 'y_explained_variance'=args$y_explained_variance, 'prediction_accuracy'=args$prediction_accuracy, 'accuracy_imbalance_difference'=args$accuracy_imbalance_difference, 'y_bias'=args$y_bias,'outcome_formula'=args$outcome_formula, 'proxy_formula'=args$proxy_formula,truth_formula=args$truth_formula, error='')
|
||||
result <- list('N'=args$N,'m'=args$m,'B0'=B0,'Bxy'=Bxy, 'Bzx'=args$Bzx, 'Bzy'=Bzy, 'Px'=Px, .'seed'=args$seed, 'y_explained_variance'=args$y_explained_variance, 'prediction_accuracy'=args$prediction_accuracy, 'accuracy_imbalance_difference'=args$accuracy_imbalance_difference, 'y_bias'=args$y_bias,'outcome_formula'=args$outcome_formula, 'proxy_formula'=args$proxy_formula,truth_formula=args$truth_formula, error='')
|
||||
|
||||
outline <- run_simulation(df, result, outcome_formula=as.formula(args$outcome_formula), proxy_formula=as.formula(args$proxy_formula), truth_formula=as.formula(args$truth_formula))
|
||||
|
||||
|
@ -76,12 +76,13 @@ parser <- add_argument(parser, "--prediction_accuracy", help='how accurate is th
|
||||
parser <- add_argument(parser, "--Bxy", help='coefficient of x on y', default=0.01)
|
||||
parser <- add_argument(parser, "--Bzy", help='coeffficient of z on y', default=-0.01)
|
||||
parser <- add_argument(parser, "--Bzx", help='coeffficient of z on x', default=-0.5)
|
||||
parser <- add_argument(parser, "--B0", help='Base rate of y', default=0.5)
|
||||
parser <- add_argument(parser, "--outcome_formula", help='formula for the outcome variable', default="y~x+z")
|
||||
parser <- add_argument(parser, "--proxy_formula", help='formula for the proxy variable', default="w_pred~y")
|
||||
|
||||
args <- parse_args(parser)
|
||||
|
||||
B0 <- 0
|
||||
B0 <- args$B0
|
||||
Bxy <- args$Bxy
|
||||
Bzy <- args$Bzy
|
||||
Bzx <- args$Bzx
|
||||
|
@ -31,14 +31,14 @@ source("simulation_base.R")
|
||||
|
||||
## one way to do it is by adding correlation to x.obs and y that isn't in w.
|
||||
## in other words, the model is missing an important feature of x.obs that's related to y.
|
||||
simulate_data <- function(N, m, B0, Bxy, Bzy, seed, prediction_accuracy=0.73, x_bias=-0.75){
|
||||
simulate_data <- function(N, m, B0, Bxy, Bzy, Py, seed, prediction_accuracy=0.73, x_bias=-0.75){
|
||||
set.seed(seed)
|
||||
|
||||
# make w and y dependent
|
||||
z <- rbinom(N, 1, 0.5)
|
||||
x <- rbinom(N, 1, 0.5)
|
||||
|
||||
ystar <- Bzy * z + Bxy * x + B0
|
||||
ystar <- Bzy * z + Bxy * x + B0 + qlogix(Py)
|
||||
y <- rbinom(N,1,plogis(ystar))
|
||||
|
||||
# glm(y ~ x + z, family="binomial")
|
||||
@ -77,6 +77,7 @@ parser <- add_argument(parser, "--prediction_accuracy", help='how accurate is th
|
||||
parser <- add_argument(parser, "--x_bias", help='how is the classifier biased?', default=0.75)
|
||||
parser <- add_argument(parser, "--Bxy", help='coefficient of x on y', default=0.3)
|
||||
parser <- add_argument(parser, "--Bzy", help='coeffficient of z on y', default=-0.3)
|
||||
parser <- add_argument(parser, "--Py", help='Base rate of y', default=0.5)
|
||||
parser <- add_argument(parser, "--outcome_formula", help='formula for the outcome variable', default="y~x+z")
|
||||
parser <- add_argument(parser, "--proxy_formula", help='formula for the proxy variable', default="w_pred~y*x")
|
||||
|
||||
|
@ -76,12 +76,13 @@ parser <- add_argument(parser, "--prediction_accuracy", help='how accurate is th
|
||||
parser <- add_argument(parser, "--z_bias", help='how is the classifier biased?', default=1.5)
|
||||
parser <- add_argument(parser, "--Bxy", help='coefficient of x on y', default=0.1)
|
||||
parser <- add_argument(parser, "--Bzy", help='coeffficient of z on y', default=-0.1)
|
||||
parser <- add_argument(parser, "--B0", help='coeffficient of z on y', default=-0.1)
|
||||
parser <- add_argument(parser, "--outcome_formula", help='formula for the outcome variable', default="y~x+z")
|
||||
parser <- add_argument(parser, "--proxy_formula", help='formula for the proxy variable', default="w_pred~y+z")
|
||||
|
||||
args <- parse_args(parser)
|
||||
|
||||
B0 <- 0
|
||||
B0 <- args$B0
|
||||
Bxy <- args$Bxy
|
||||
Bzy <- args$Bzy
|
||||
|
||||
|
@ -150,19 +150,202 @@ robustness_1_dv.RDS: robustness_1_dv.feather
|
||||
${srun} Rscript plot_dv_example.R --infile $< --name "robustness_1_dv" --outfile $@
|
||||
|
||||
|
||||
robustness_2_jobs: grid_sweep.py 01_two_covariates.R simulation_base.R grid_sweep.py
|
||||
robustness_2_jobs_p1: grid_sweep.py 01_two_covariates.R simulation_base.R grid_sweep.py
|
||||
rm -f $@
|
||||
${srun} $< --command 'Rscript 01_two_covariates.R' --arg_dict '{"N":${Ns},"m":${ms}, "seed":${seeds}, "outfile":["robustness_2.feather"],"y_explained_variance":${explained_variances}, "Bzy":[-0.3],"Bxy":[0.3],"Bzx":[0.3], "outcome_formula":["y~x+z"], "proxy_formula":["w_pred~y+x"], "truth_formula":["x~z"], "prediction_accuracy":[0.6,0.73,0.8,0.85,0.9,0.95]}' --outfile $@
|
||||
${srun} $< --command 'Rscript 01_two_covariates.R' --arg_dict '{"N":${Ns},"m":${ms}, "seed":${seeds}, "outfile":["robustness_2.feather"],"y_explained_variance":${explained_variances}, "Bzy":[-0.3],"Bxy":[0.3],"Bzx":[0.3], "outcome_formula":["y~x+z"], "proxy_formula":["w_pred~y+x"], "truth_formula":["x~z"], "prediction_accuracy":[0.60,0.65]}' --outfile $@
|
||||
|
||||
robustness_2_jobs_p2: grid_sweep.py 01_two_covariates.R simulation_base.R grid_sweep.py
|
||||
rm -f $@
|
||||
${srun} $< --command 'Rscript 01_two_covariates.R' --arg_dict '{"N":${Ns},"m":${ms}, "seed":${seeds}, "outfile":["robustness_2.feather"],"y_explained_variance":${explained_variances}, "Bzy":[-0.3],"Bxy":[0.3],"Bzx":[0.3], "outcome_formula":["y~x+z"], "proxy_formula":["w_pred~y+x"], "truth_formula":["x~z"], "prediction_accuracy":[0.70,0.75]}' --outfile $@
|
||||
|
||||
robustness_2_jobs_p3: grid_sweep.py 01_two_covariates.R simulation_base.R grid_sweep.py
|
||||
rm -f $@
|
||||
${srun} $< --command 'Rscript 01_two_covariates.R' --arg_dict '{"N":${Ns},"m":${ms}, "seed":${seeds}, "outfile":["robustness_2.feather"],"y_explained_variance":${explained_variances}, "Bzy":[-0.3],"Bxy":[0.3],"Bzx":[0.3], "outcome_formula":["y~x+z"], "proxy_formula":["w_pred~y+x"], "truth_formula":["x~z"], "prediction_accuracy":[0.80,0.85]}' --outfile $@
|
||||
|
||||
START=1
|
||||
END=$(shell cat robustness_2_jobs | wc -l)
|
||||
robustness_2_jobs_p4: grid_sweep.py 01_two_covariates.R simulation_base.R grid_sweep.py
|
||||
rm -f $@
|
||||
${srun} $< --command 'Rscript 01_two_covariates.R' --arg_dict '{"N":${Ns},"m":${ms}, "seed":${seeds}, "outfile":["robustness_2.feather"],"y_explained_variance":${explained_variances}, "Bzy":[-0.3],"Bxy":[0.3],"Bzx":[0.3], "outcome_formula":["y~x+z"], "proxy_formula":["w_pred~y+x"], "truth_formula":["x~z"], "prediction_accuracy":[0.90,0.95]}' --outfile $@
|
||||
|
||||
START=0
|
||||
END_1=$(shell cat robustness_2_jobs_p1 | wc -l)
|
||||
END_2=$(shell cat robustness_2_jobs_p2 | wc -l)
|
||||
END_3=$(shell cat robustness_2_jobs_p3 | wc -l)
|
||||
END_4=$(shell cat robustness_2_jobs_p4 | wc -l)
|
||||
STEP=1000
|
||||
ITEMS=$(shell seq $(START) $(STEP) $(END))
|
||||
ONE=1
|
||||
ITEMS_1=$(shell seq $(START) $(STEP) $(END_1))
|
||||
ITEMS_2=$(shell seq $(START) $(STEP) $(END_2))
|
||||
ITEMS_3=$(shell seq $(START) $(STEP) $(END_3))
|
||||
ITEMS_4=$(shell seq $(START) $(STEP) $(END_4))
|
||||
|
||||
robustness_2.feather: robustness_2_jobs
|
||||
$(foreach item,$(ITEMS),sbatch --wait --verbose --array=$(shell expr $(item))-$(shell expr $(item) + $(STEP)) run_simulation.sbatch 0 $<)
|
||||
robustness_2.feather: robustness_2_jobs_p1 robustness_2_jobs_p2 robustness_2_jobs_p3 robustness_2_jobs_p4
|
||||
$(foreach item,$(ITEMS_1),sbatch --wait --verbose --array=$(shell expr $(item) + $(ONE))-$(shell expr $(item) + $(STEP)) run_simulation.sbatch 0 robustness_2_jobs_p1)
|
||||
$(foreach item,$(ITEMS_2),sbatch --wait --verbose --array=$(shell expr $(item) + $(ONE))-$(shell expr $(item) + $(STEP)) run_simulation.sbatch 0 robustness_2_jobs_p2;)
|
||||
$(foreach item,$(ITEMS_3),sbatch --wait --verbose --array=$(shell expr $(item) + $(ONE))-$(shell expr $(item) + $(STEP)) run_simulation.sbatch 0 robustness_2_jobs_p3;)
|
||||
$(foreach item,$(ITEMS_4),sbatch --wait --verbose --array=$(shell expr $(item) + $(ONE))-$(shell expr $(item) + $(STEP)) run_simulation.sbatch 0 robustness_2_jobs_p4;)
|
||||
|
||||
|
||||
robustness_2_dv_jobs_p1: grid_sweep.py 03_depvar.R simulation_base.R grid_sweep.py
|
||||
rm -f $@
|
||||
${srun} $< --command 'Rscript 03_depvar.R' --arg_dict '{"N":${Ns},"m":${ms}, "seed":${seeds}, "outfile":["robustness_2.feather"],"y_explained_variance":${explained_variances}, "Bzy":[-0.3],"Bxy":[0.3],"Bzx":[0.3], "outcome_formula":["y~x+z"], "prediction_accuracy":[0.60,0.65]}' --outfile $@
|
||||
|
||||
robustness_2_dv_jobs_p2: grid_sweep.py 03_depvar.R simulation_base.R grid_sweep.py
|
||||
rm -f $@
|
||||
${srun} $< --command 'Rscript 03_depvar.R' --arg_dict '{"N":${Ns},"m":${ms}, "seed":${seeds}, "outfile":["robustness_2.feather"],"y_explained_variance":${explained_variances}, "Bzy":[-0.3],"Bxy":[0.3],"Bzx":[0.3], "outcome_formula":["y~x+z"], "prediction_accuracy":[0.70,0.75]}' --outfile $@
|
||||
|
||||
robustness_2_dv_jobs_p3: grid_sweep.py 03_depvar.R simulation_base.R grid_sweep.py
|
||||
rm -f $@
|
||||
${srun} $< --command 'Rscript 03_depvar.R' --arg_dict '{"N":${Ns},"m":${ms}, "seed":${seeds}, "outfile":["robustness_2.feather"],"y_explained_variance":${explained_variances}, "Bzy":[-0.3],"Bxy":[0.3],"Bzx":[0.3], "outcome_formula":["y~x+z"], "prediction_accuracy":[0.80,0.85]}' --outfile $@
|
||||
|
||||
robustness_2_dv_jobs_p4: grid_sweep.py 03_depvar.R simulation_base.R grid_sweep.py
|
||||
rm -f $@
|
||||
${srun} $< --command 'Rscript 01_two_covariates.R' --arg_dict '{"N":${Ns},"m":${ms}, "seed":${seeds}, "outfile":["robustness_2.feather"],"y_explained_variance":${explained_variances}, "Bzy":[-0.3],"Bxy":[0.3],"Bzx":[0.3], "outcome_formula":["y~x+z"], "prediction_accuracy":[0.90,0.95]}' --outfile $@
|
||||
|
||||
START=0
|
||||
END_1=$(shell cat robustness_2_dv_jobs_p1 | wc -l)
|
||||
END_2=$(shell cat robustness_2_dv_jobs_p2 | wc -l)
|
||||
END_3=$(shell cat robustness_2_dv_jobs_p3 | wc -l)
|
||||
END_4=$(shell cat robustness_2_dv_jobs_p4 | wc -l)
|
||||
STEP=1000
|
||||
ONE=1
|
||||
ITEMS_1=$(shell seq $(START) $(STEP) $(END_1))
|
||||
ITEMS_2=$(shell seq $(START) $(STEP) $(END_2))
|
||||
ITEMS_3=$(shell seq $(START) $(STEP) $(END_3))
|
||||
ITEMS_4=$(shell seq $(START) $(STEP) $(END_4))
|
||||
|
||||
robustness_2_dv.feather: robustness_2_dv_jobs_p1 robustness_2_dv_jobs_p2 robustness_2_dv_jobs_p3 robustness_2_dv_jobs_p4
|
||||
$(foreach item,$(ITEMS_1),sbatch --wait --verbose --array=$(shell expr $(item) + $(ONE))-$(shell expr $(item) + $(STEP)) run_simulation.sbatch 0 robustness_2_dv_jobs_p1)
|
||||
$(foreach item,$(ITEMS_2),sbatch --wait --verbose --array=$(shell expr $(item) + $(ONE))-$(shell expr $(item) + $(STEP)) run_simulation.sbatch 0 robustness_2_dv_jobs_p2;)
|
||||
$(foreach item,$(ITEMS_3),sbatch --wait --verbose --array=$(shell expr $(item) + $(ONE))-$(shell expr $(item) + $(STEP)) run_simulation.sbatch 0 robustness_2_dv_jobs_p3;)
|
||||
$(foreach item,$(ITEMS_4),sbatch --wait --verbose --array=$(shell expr $(item) + $(ONE))-$(shell expr $(item) + $(STEP)) run_simulation.sbatch 0 robustness_2_dv_jobs_p4;)
|
||||
|
||||
|
||||
|
||||
robustness_3_jobs_p1: grid_sweep.py 01_two_covariates.R simulation_base.R grid_sweep.py
|
||||
rm -f $@
|
||||
${srun} $< --command 'Rscript 01_two_covariates.R' --arg_dict '{"N":${Ns},"m":${ms}, "seed":${seeds}, "outfile":["robustness_3.feather"],"y_explained_variance":${explained_variances}, "Bzy":[-0.3],"Bxy":[0.3],"Bzx":[0.3],"Px":[0.5,0.6], "outcome_formula":["y~x+z"], "proxy_formula":["w_pred~y+x"], "truth_formula":["x~z"], "prediction_accuracy":[0.85]}' --outfile $@
|
||||
|
||||
robustness_3_jobs_p2: grid_sweep.py 01_two_covariates.R simulation_base.R grid_sweep.py
|
||||
rm -f $@
|
||||
${srun} $< --command 'Rscript 01_two_covariates.R' --arg_dict '{"N":${Ns},"m":${ms}, "seed":${seeds}, "outfile":["robustness_3.feather"],"y_explained_variance":${explained_variances}, "Bzy":[-0.3],"Bxy":[0.3],"Bzx":[0.3],"Px":[0.7,0.8], "outcome_formula":["y~x+z"], "proxy_formula":["w_pred~y+x"], "truth_formula":["x~z"], "prediction_accuracy":[0.85]}' --outfile $@
|
||||
|
||||
robustness_3_jobs_p3: grid_sweep.py 01_two_covariates.R simulation_base.R grid_sweep.py
|
||||
rm -f $@
|
||||
${srun} $< --command 'Rscript 01_two_covariates.R' --arg_dict '{"N":${Ns},"m":${ms}, "seed":${seeds}, "outfile":["robustness_3.feather"],"y_explained_variance":${explained_variances}, "Bzy":[-0.3],"Bxy":[0.3],"Bzx":[0.3],"Px":[0.9,0.95], "outcome_formula":["y~x+z"], "proxy_formula":["w_pred~y+x"], "truth_formula":["x~z"], "prediction_accuracy":[0.85]}' --outfile $@
|
||||
|
||||
START=0
|
||||
END_1=$(shell cat robustness_3_jobs_p1 | wc -l)
|
||||
END_2=$(shell cat robustness_3_jobs_p2 | wc -l)
|
||||
END_3=$(shell cat robustness_3_jobs_p3 | wc -l)
|
||||
|
||||
STEP=1000
|
||||
ONE=1
|
||||
ITEMS_1=$(shell seq $(START) $(STEP) $(END_1))
|
||||
ITEMS_2=$(shell seq $(START) $(STEP) $(END_2))
|
||||
ITEMS_3=$(shell seq $(START) $(STEP) $(END_3))
|
||||
|
||||
robustness_3.feather: robustness_3_jobs_p1 robustness_3_jobs_p2 robustness_3_jobs_p3
|
||||
$(foreach item,$(ITEMS_1),sbatch --wait --verbose --array=$(shell expr $(item) + $(ONE))-$(shell expr $(item) + $(STEP)) run_simulation.sbatch 0 robustness_3_jobs_p1)
|
||||
$(foreach item,$(ITEMS_2),sbatch --wait --verbose --array=$(shell expr $(item) + $(ONE))-$(shell expr $(item) + $(STEP)) run_simulation.sbatch 0 robustness_3_jobs_p2;)
|
||||
$(foreach item,$(ITEMS_3),sbatch --wait --verbose --array=$(shell expr $(item) + $(ONE))-$(shell expr $(item) + $(STEP)) run_simulation.sbatch 0 robustness_3_jobs_p3;)
|
||||
|
||||
|
||||
robustness_3_dv_jobs_p1: grid_sweep.py 03_depvar.R simulation_base.R grid_sweep.py
|
||||
rm -f $@
|
||||
${srun} $< --command 'Rscript 03_depvar.R' --arg_dict '{"N":${Ns},"m":${ms}, "seed":${seeds}, "outfile":["robustness_3.feather"],"y_explained_variance":${explained_variances}, "Bzy":[-0.3],"Bxy":[0.3],"Bzx":[0.3],"B0":[0.5,0.6], "outcome_formula":["y~x+z"], "prediction_accuracy":[0.85]}' --outfile $@
|
||||
|
||||
robustness_3_dv_jobs_p2: grid_sweep.py 03_depvar.R simulation_base.R grid_sweep.py
|
||||
rm -f $@
|
||||
${srun} $< --command 'Rscript 03_depvar.R' --arg_dict '{"N":${Ns},"m":${ms}, "seed":${seeds}, "outfile":["robustness_3.feather"],"y_explained_variance":${explained_variances}, "Bzy":[-0.3],"Bxy":[0.3],"Bzx":[0.3],"B0":[0.7,0.8], "outcome_formula":["y~x+z"], "prediction_accuracy":[0.85]}' --outfile $@
|
||||
|
||||
robustness_3_dv_jobs_p3: grid_sweep.py 03_depvar.R simulation_base.R grid_sweep.py
|
||||
rm -f $@
|
||||
${srun} $< --command 'Rscript 03_depvar.R' --arg_dict '{"N":${Ns},"m":${ms}, "seed":${seeds}, "outfile":["robustness_3.feather"],"y_explained_variance":${explained_variances}, "Bzy":[-0.3],"Bxy":[0.3],"Bzx":[0.3], "B0":[0.9,0.95], "outcome_formula":["y~x+z"], "prediction_accuracy":[0.85]}' --outfile $@
|
||||
|
||||
START=0
|
||||
END_1=$(shell cat robustness_3_dv_jobs_p1 | wc -l)
|
||||
END_2=$(shell cat robustness_3_dv_jobs_p2 | wc -l)
|
||||
END_3=$(shell cat robustness_3_dv_jobs_p3 | wc -l)
|
||||
|
||||
STEP=1000
|
||||
ONE=1
|
||||
ITEMS_1=$(shell seq $(START) $(STEP) $(END_1))
|
||||
ITEMS_2=$(shell seq $(START) $(STEP) $(END_2))
|
||||
ITEMS_3=$(shell seq $(START) $(STEP) $(END_3))
|
||||
|
||||
robustness_3_dv.feather: robustness_3_dv_jobs_p1 robustness_3_dv_jobs_p2 robustness_3_dv_jobs_p3
|
||||
$(foreach item,$(ITEMS_1),sbatch --wait --verbose --array=$(shell expr $(item) + $(ONE))-$(shell expr $(item) + $(STEP)) run_simulation.sbatch 0 robustness_3_dv_jobs_p1)
|
||||
$(foreach item,$(ITEMS_2),sbatch --wait --verbose --array=$(shell expr $(item) + $(ONE))-$(shell expr $(item) + $(STEP)) run_simulation.sbatch 0 robustness_3_dv_jobs_p2;)
|
||||
$(foreach item,$(ITEMS_3),sbatch --wait --verbose --array=$(shell expr $(item) + $(ONE))-$(shell expr $(item) + $(STEP)) run_simulation.sbatch 0 robustness_3_dv_jobs_p3;)
|
||||
|
||||
|
||||
|
||||
robustness_4_jobs_p1: grid_sweep.py 02_indep_differential.R simulation_base.R grid_sweep.py
|
||||
rm -f $@
|
||||
${srun} $< --command 'Rscript 02_indep_differential.R' --arg_dict '{"N":${Ns},"m":${ms}, "seed":${seeds}, "outfile":["robustness_4.feather"],"y_explained_variance":${explained_variances}, "Bzy":[-0.3],"Bxy":[0.3],"Bzx":[0.3], "outcome_formula":["y~x+z"], "proxy_formula":["w_pred~y+x"], "truth_formula":["x~z"], "prediction_accuracy":[0.85],y_bias=[-1,-0.85]}' --outfile $@
|
||||
|
||||
robustness_4_jobs_p2: grid_sweep.py 02_indep_differential.R simulation_base.R grid_sweep.py
|
||||
rm -f $@
|
||||
${srun} $< --command 'Rscript 02_indep_differential.R' --arg_dict '{"N":${Ns},"m":${ms}, "seed":${seeds}, "outfile":["robustness_4.feather"],"y_explained_variance":${explained_variances}, "Bzy":[-0.3],"Bxy":[0.3],"Bzx":[0.3], "outcome_formula":["y~x+z"], "proxy_formula":["w_pred~y+x"], "truth_formula":["x~z"], "prediction_accuracy":[0.85], y_bias=[-0.70,-0.55]}' --outfile $@
|
||||
|
||||
robustness_4_jobs_p3: grid_sweep.py 02_indep_differential.R simulation_base.R grid_sweep.py
|
||||
rm -f $@
|
||||
${srun} $< --command 'Rscript 02_indep_differential.R' --arg_dict '{"N":${Ns},"m":${ms}, "seed":${seeds}, "outfile":["robustness_4.feather"],"y_explained_variance":${explained_variances}, "Bzy":[-0.3],"Bxy":[0.3],"Bzx":[0.3], "outcome_formula":["y~x+z"], "proxy_formula":["w_pred~y+x"], "truth_formula":["x~z"], "prediction_accuracy":[0.85],y_bias=[-0.4,-0.25]}' --outfile $@
|
||||
|
||||
robustness_4_jobs_p4: grid_sweep.py 02_indep_differential.R simulation_base.R grid_sweep.py
|
||||
rm -f $@
|
||||
${srun} $< --command 'Rscript 02_indep_differential.R' --arg_dict '{"N":${Ns},"m":${ms}, "seed":${seeds}, "outfile":["robustness_4.feather"],"y_explained_variance":${explained_variances}, "Bzy":[-0.3],"Bxy":[0.3],"Bzx":[0.3], "outcome_formula":["y~x+z"], "proxy_formula":["w_pred~y+x"], "truth_formula":["x~z"], "prediction_accuracy":[0.85],y_bias=[-0.1,0]}' --outfile $@
|
||||
|
||||
START=0
|
||||
END_1=$(shell cat robustness_4_jobs_p1 | wc -l)
|
||||
END_2=$(shell cat robustness_4_jobs_p2 | wc -l)
|
||||
END_3=$(shell cat robustness_4_jobs_p3 | wc -l)
|
||||
END_4=$(shell cat robustness_4_jobs_p3 | wc -l)
|
||||
|
||||
STEP=1000
|
||||
ONE=1
|
||||
ITEMS_1=$(shell seq $(START) $(STEP) $(END_1))
|
||||
ITEMS_2=$(shell seq $(START) $(STEP) $(END_2))
|
||||
ITEMS_3=$(shell seq $(START) $(STEP) $(END_3))
|
||||
ITEMS_4=$(shell seq $(START) $(STEP) $(END_4))
|
||||
|
||||
robustness_4.feather: robustness_4_jobs_p1 robustness_4_jobs_p2 robustness_4_jobs_p3
|
||||
$(foreach item,$(ITEMS_1),sbatch --wait --verbose --array=$(shell expr $(item) + $(ONE))-$(shell expr $(item) + $(STEP)) run_simulation.sbatch 0 robustness_4_jobs_p1)
|
||||
$(foreach item,$(ITEMS_2),sbatch --wait --verbose --array=$(shell expr $(item) + $(ONE))-$(shell expr $(item) + $(STEP)) run_simulation.sbatch 0 robustness_4_jobs_p2;)
|
||||
$(foreach item,$(ITEMS_3),sbatch --wait --verbose --array=$(shell expr $(item) + $(ONE))-$(shell expr $(item) + $(STEP)) run_simulation.sbatch 0 robustness_4_jobs_p3;)
|
||||
|
||||
|
||||
robustness_4_dv_jobs_p1: grid_sweep.py 03_depvar.R simulation_base.R grid_sweep.py
|
||||
rm -f $@
|
||||
${srun} $< --command 'Rscript 03_depvar.R' --arg_dict '{"N":${Ns},"m":${ms}, "seed":${seeds}, "outfile":["robustness_4.feather"],"y_explained_variance":${explained_variances}, "Bzy":[-0.3],"Bxy":[0.3],"Bzx":[0.3],"B0":[0.5] "outcome_formula":["y~x+z"], "prediction_accuracy":[0.85],z_bias=[0,0.1]}' --outfile $@
|
||||
|
||||
robustness_4_dv_jobs_p2: grid_sweep.py 03_depvar.R simulation_base.R grid_sweep.py
|
||||
rm -f $@
|
||||
${srun} $< --command 'Rscript 03_depvar.R' --arg_dict '{"N":${Ns},"m":${ms}, "seed":${seeds}, "outfile":["robustness_4.feather"],"y_explained_variance":${explained_variances}, "Bzy":[-0.3],"Bxy":[0.3],"Bzx":[0.3],"B0":[0.5] "outcome_formula":["y~x+z"], "prediction_accuracy":[0.85],z_bias=[0.25,0.4]}' --outfile $@
|
||||
|
||||
robustness_4_dv_jobs_p3: grid_sweep.py 03_depvar.R simulation_base.R grid_sweep.py
|
||||
rm -f $@
|
||||
${srun} $< --command 'Rscript 03_depvar.R' --arg_dict '{"N":${Ns},"m":${ms}, "seed":${seeds}, "outfile":["robustness_4.feather"],"y_explained_variance":${explained_variances}, "Bzy":[-0.3],"Bxy":[0.3],"Bzx":[0.3], "B0":[0.5], "outcome_formula":["y~x+z"], "prediction_accuracy":[0.85],z_bias=[0.55,0.7]}' --outfile $@
|
||||
robustness_4_dv_jobs_p4: grid_sweep.py 03_depvar.R simulation_base.R grid_sweep.py
|
||||
rm -f $@
|
||||
${srun} $< --command 'Rscript 03_depvar.R' --arg_dict '{"N":${Ns},"m":${ms}, "seed":${seeds}, "outfile":["robustness_4.feather"],"y_explained_variance":${explained_variances}, "Bzy":[-0.3],"Bxy":[0.3],"Bzx":[0.3], "B0":[0.5], "outcome_formula":["y~x+z"], "prediction_accuracy":[0.85],z_bias=[0.85,1]}' --outfile $@
|
||||
|
||||
|
||||
START=0
|
||||
END_1=$(shell cat robustness_4_dv_jobs_p1 | wc -l)
|
||||
END_2=$(shell cat robustness_4_dv_jobs_p2 | wc -l)
|
||||
END_3=$(shell cat robustness_4_dv_jobs_p3 | wc -l)
|
||||
|
||||
STEP=1000
|
||||
ONE=1
|
||||
ITEMS_1=$(shell seq $(START) $(STEP) $(END_1))
|
||||
ITEMS_2=$(shell seq $(START) $(STEP) $(END_2))
|
||||
ITEMS_3=$(shell seq $(START) $(STEP) $(END_3))
|
||||
|
||||
robustness_4_dv.feather: robustness_4_dv_jobs_p1 robustness_4_dv_jobs_p2 robustness_4_dv_jobs_p3
|
||||
$(foreach item,$(ITEMS_1),sbatch --wait --verbose --array=$(shell expr $(item) + $(ONE))-$(shell expr $(item) + $(STEP)) run_simulation.sbatch 0 robustness_4_dv_jobs_p1)
|
||||
$(foreach item,$(ITEMS_2),sbatch --wait --verbose --array=$(shell expr $(item) + $(ONE))-$(shell expr $(item) + $(STEP)) run_simulation.sbatch 0 robustness_4_dv_jobs_p2;)
|
||||
$(foreach item,$(ITEMS_3),sbatch --wait --verbose --array=$(shell expr $(item) + $(ONE))-$(shell expr $(item) + $(STEP)) run_simulation.sbatch 0 robustness_4_dv_jobs_p3;)
|
||||
|
||||
#
|
||||
clean:
|
||||
|
@ -12,3 +12,29 @@ Like `robustness\_1.RDS` but with a less precise model for $w_pred$. In the mai
|
||||
|
||||
This is just example 1 with varying levels of classifier accuracy.
|
||||
|
||||
# robustness_2_dv.RDS
|
||||
|
||||
Example 3 with varying levels of classifier accuracy
|
||||
|
||||
# robustness_3.RDS
|
||||
|
||||
Example 1 with varying levels of skewness in the classified variable. The variable `Px` is the baserate of $X$ and controls the skewness of $X$.
|
||||
It probably makes more sense to report the mean of $X$ instead of `Px` in the supplement.
|
||||
|
||||
# robustness_3_dv.RDS
|
||||
|
||||
Example 3 with varying levels of skewness in the classified variable. The variable `B0` is the intercept of the main model and controls the skewness of $Y$.
|
||||
It probably makes more sense to report the mean of $Y$ instead of B0 in the supplement.
|
||||
|
||||
# robustness_4.RDS
|
||||
|
||||
Example 2 with varying amounts of differential error. The variable `y_bias` controls the amount of differential error.
|
||||
It probably makes more sense to report the corrleation between $Y$ and $X-~$, or the difference in accuracy from when when $Y=1$ to $Y=0$ in the supplement instead of `y_bias`.
|
||||
|
||||
# robustness_4_dv.RDS
|
||||
|
||||
Example 4 with varying amounts of bias. The variable `z_bias` controls the amount of differential error.
|
||||
It probably makes more sense to report the corrleation between $Z$ and $Y-W$, or the difference in accuracy from when when $Z=1$ to $Z=0$ in the supplement instead of `z_bias`.
|
||||
|
||||
|
||||
|
||||
|
@ -151,10 +151,10 @@ run_simulation_depvar <- function(df, result, outcome_formula=y~x+z, proxy_formu
|
||||
temp.df <- copy(df)
|
||||
temp.df[,y:=y.obs]
|
||||
mod.caroll.lik <- measerr_mle_dv(temp.df, outcome_formula=outcome_formula, proxy_formula=proxy_formula)
|
||||
fisher.info <- solve(mod.caroll.lik$hessian)
|
||||
fischer.info <- solve(mod.caroll.lik$hessian)
|
||||
coef <- mod.caroll.lik$par
|
||||
ci.upper <- coef + sqrt(diag(fisher.info)) * 1.96
|
||||
ci.lower <- coef - sqrt(diag(fisher.info)) * 1.96
|
||||
ci.upper <- coef + sqrt(diag(fischer.info)) * 1.96
|
||||
ci.lower <- coef - sqrt(diag(fischer.info)) * 1.96
|
||||
result <- append(result,
|
||||
list(Bxy.est.mle = coef['x'],
|
||||
Bxy.ci.upper.mle = ci.upper['x'],
|
||||
@ -299,11 +299,32 @@ run_simulation <- function(df, result, outcome_formula=y~x+z, proxy_formula=NUL
|
||||
temp.df <- copy(df)
|
||||
temp.df <- temp.df[,x:=x.obs]
|
||||
mod.caroll.lik <- measerr_mle(temp.df, outcome_formula=outcome_formula, proxy_formula=proxy_formula, truth_formula=truth_formula)
|
||||
fisher.info <- solve(mod.caroll.lik$hessian)
|
||||
coef <- mod.caroll.lik$par
|
||||
ci.upper <- coef + sqrt(diag(fisher.info)) * 1.96
|
||||
ci.lower <- coef - sqrt(diag(fisher.info)) * 1.96
|
||||
|
||||
## tryCatch({
|
||||
## mod.calibrated.mle <- mecor(y ~ MeasError(w_pred, reference = x.obs) + z, df, B=400, method='efficient')
|
||||
## (mod.calibrated.mle)
|
||||
## (mecor.ci <- summary(mod.calibrated.mle)$c$ci['x.obs',])
|
||||
## result <- append(result, list(
|
||||
## Bxy.est.mecor = mecor.ci['Estimate'],
|
||||
## Bxy.ci.upper.mecor = mecor.ci['UCI'],
|
||||
## Bxy.ci.lower.mecor = mecor.ci['LCI'])
|
||||
## )
|
||||
|
||||
|
||||
|
||||
fischer.info <- NA
|
||||
ci.upper <- NA
|
||||
ci.lower <- NA
|
||||
|
||||
tryCatch({fischer.info <- solve(mod.caroll.lik$hessian)
|
||||
ci.upper <- coef + sqrt(diag(fischer.info)) * 1.96
|
||||
ci.lower <- coef - sqrt(diag(fischer.info)) * 1.96
|
||||
},
|
||||
|
||||
error=function(e) {result[['error']] <- as.character(e)
|
||||
})
|
||||
|
||||
coef <- mod.caroll.lik$par
|
||||
|
||||
result <- append(result,
|
||||
list(Bxy.est.mle = coef['x'],
|
||||
|
Loading…
Reference in New Issue
Block a user