1
0

remove files that we won't put on osf.

This commit is contained in:
Nathan TeBlunthuis 2023-03-01 10:42:10 -08:00
parent c1dbbfd0dd
commit c02efbe381
41 changed files with 0 additions and 3367 deletions

9
charts/.gitignore vendored
View File

@ -1,9 +0,0 @@
*.aux
*.svg
*.pdf
*.png
*.aux
*.fdb_latexmk
*.log
*.fls
auto/*

View File

@ -1,29 +0,0 @@
#!/usr/bin/make
all: $(patsubst %.tex,%.svg,$(wildcard *.tex)) $(patsubst %.tex,%.png,$(wildcard *.tex))
%.png: %.pdf
convert -density 300 -transparent white $< $@
%.svg: %.pdf
/usr/bin/inkscape $< --export-plain-svg --export-type=svg --export-filename=$@
%.pdf: %.tex
latexmk -f -pdf $<
clean:
latexmk -C *.tex
rm -f *.tmp
rm -f vc
rm *.svg
viewpdf: all
evince *.pdf
vc:
vc-git
pdf: all
.PHONY: clean all

View File

@ -1,46 +0,0 @@
\documentclass[12pt]{standalone}
\usepackage{ucs}
\usepackage[utf8x]{inputenc}
\usepackage[T1]{fontenc}
\usepackage{textcomp}
\renewcommand{\rmdefault}{ugm}
\renewcommand{\sfdefault}{phv}
\usepackage[garamond]{mathdesign}
\usepackage{tikz}
\usetikzlibrary{positioning, shapes, arrows, shadows, }
\begin{document}
\tikzset{
observed/.style={circle, draw},
partly observed/.style 2 args={draw, fill=#2, path picture={
\fill[#1, sharp corners] (path picture bounding box.south west) -|
(path picture bounding box.north east) -- cycle;},
circle},
unobserved/.style={draw, circle, fill=gray!40},
residual/.style={draw, rectangle}
}
\tikzset{>=latex}
\begin{tikzpicture}
\node[observed] (y) {$Y$};
\node[unobserved, above=of y] (x) {$X$};
\node[observed, left=of x] (w) {$W$};
% \node[unobserved, above=of w] (k) {$K$};
\node[observed,right=of x] (z) {$Z$};
% \node[residual,below=of y] (e) {$\varepsilon$};
% \node[residual,below=of w] (xi) {$\xi$};
\draw[->] (z) to (y);
\draw[->] (z) -- (x);
\draw[->] (x) -- (y);
\draw[->] (x) -- (w);
% \draw[->] (y) -- (w);
% \draw[->] (x) -- (xi);
% \draw[->] (w) -- (xi);
\end{tikzpicture}
\end{document}

View File

@ -1,47 +0,0 @@
\documentclass[12pt]{standalone}
\usepackage{ucs}
\usepackage[utf8x]{inputenc}
\usepackage[T1]{fontenc}
\usepackage{textcomp}
\renewcommand{\rmdefault}{ugm}
\renewcommand{\sfdefault}{phv}
\usepackage[garamond]{mathdesign}
\usepackage{tikz}
\usetikzlibrary{positioning, shapes, arrows, shadows}
\begin{document}
\tikzset{
observed/.style={circle, draw},
partly observed/.style 2 args={draw, fill=#2, path picture={
\fill[#1, sharp corners] (path picture bounding box.south west) -|
(path picture bounding box.north east) -- cycle;},
circle},
unobserved/.style={draw, circle, fill=gray!40},
residual/.style={draw, rectangle}
}
\tikzset{>=latex}
\begin{tikzpicture}
\node[observed] (y) {$Y$};
\node[unobserved, above=of y] (x) {$X$};
\node[observed, left=of x] (w) {$W$};
\node[observed,right=of x] (z) {$Z$};
\draw[->] (z) to (y);
\draw[->] (z) -- (x);
\draw[->] (x) -- (y);
\draw[->] (x) -- (w);
\draw[->] (x) to (y);
\draw[->] (y) -- (w);
\draw[->] (y) -- (w);
\end{tikzpicture}
\end{document}

View File

@ -1,42 +0,0 @@
\documentclass[12pt]{standalone}
\usepackage{ucs}
\usepackage[utf8x]{inputenc}
\usepackage[T1]{fontenc}
\usepackage{textcomp}
\renewcommand{\rmdefault}{ugm}
\renewcommand{\sfdefault}{phv}
\usepackage[garamond]{mathdesign}
\usepackage{tikz}
\usetikzlibrary{positioning, shapes, arrows, shadows}
\begin{document}
\tikzset{
observed/.style={circle, draw},
partly observed/.style 2 args={draw, fill=#2, path picture={
\fill[#1, sharp corners] (path picture bounding box.south west) -|
(path picture bounding box.north east) -- cycle;},
circle},
unobserved/.style={draw, circle, fill=gray!40},
residual/.style={draw, rectangle}
}
\tikzset{>=latex}
\begin{tikzpicture}
\node[unobserved] (y) {$Y$};
\node[observed, above=of y] (x) {$X$};
\node[observed, left=of x] (w) {$W$};
% \node[unobserved, above=of w] (k) {$K$};
\node[observed,right=of x] (z) {$Z$};
% \node[residual,below=of y] (e) {$\varepsilon$};
% \node[residual,below=of w] (xi) {$\xi$};
\draw[->] (z) to (y);
\draw[->] (x) -- (y);
\draw[->] (y) -- (w);
\end{tikzpicture}
\end{document}

View File

@ -1,47 +0,0 @@
\documentclass[12pt]{standalone}
\usepackage{ucs}
\usepackage[utf8x]{inputenc}
\usepackage[T1]{fontenc}
\usepackage{textcomp}
\renewcommand{\rmdefault}{ugm}
\renewcommand{\sfdefault}{phv}
\usepackage[garamond]{mathdesign}
\usepackage{tikz}
\usetikzlibrary{positioning, shapes, arrows, shadows}
\begin{document}
\tikzset{
observed/.style={circle, draw},
partly observed/.style 2 args={draw, fill=#2, path picture={
\fill[#1, sharp corners] (path picture bounding box.south west) -|
(path picture bounding box.north east) -- cycle;},
circle},
unobserved/.style={draw, circle, fill=gray!40},
residual/.style={draw, rectangle}
}
\tikzset{>=latex}
\begin{tikzpicture}
\node[unobserved] (y) {$Y$};
\node[observed={white}{gray!40}, above=of y] (x) {$X$};
\node[observed, left=of x] (w) {$W$};
% \node[unobserved, above=of w] (k) {$K$};
\node[observed,right=of x] (z) {$Z$};
% \node[residual,below=of y] (e) {$\varepsilon$};
% \node[residual,below=of w] (xi) {$\xi$};
\draw[->] (x) -- (y);
% \draw[->] (x) -- (w);
\draw[->] (y) -- (w);
% \draw[->] (k) -- (w);
\draw[->] (z) -- (y);
% \draw[->] (z) -- (k);
% \draw[->] (y) -- (xi);
% \draw[->] (w) -- (xi);
\end{tikzpicture}
\end{document}

View File

@ -1,35 +0,0 @@
\documentclass[12pt]{standalone}
\usepackage{ucs}
\usepackage[utf8x]{inputenc}
\usepackage[T1]{fontenc}
\usepackage{textcomp}
\renewcommand{\rmdefault}{ugm}
\renewcommand{\sfdefault}{phv}
\usepackage[garamond]{mathdesign}
\usepackage{tikz}
\usetikzlibrary{positioning, shapes, arrows, shadows}
\begin{document}
\tikzset{
observed/.style={circle, draw},
partly observed/.style 2 args={draw, fill=#2, path picture={
\fill[#1, sharp corners] (path picture bounding box.south west) -|
(path picture bounding box.north east) -- cycle;},
circle},
unobserved/.style={draw, circle, fill=gray!40},
residual/.style={draw, rectangle}
}
\tikzset{>=latex}
\begin{tikzpicture}
\matrix [draw, below, font=\small, align=center, column sep=2\pgflinewidth, inner sep=0.4em, outer sep=0em, nodes={align=center, anchor=center}] at (current bounding box.south){
\node[observed,label=right:observed] {}; \\
\node[unobserved,label=right:automatically classified]{}; \\
\node[residual,label=right:error term]{}; \\
};
\end{tikzpicture}
\end{document}

View File

@ -1,43 +0,0 @@
require(tibble)
require(purrr)
.emulate_coding <- function(ground_truth, Q = 1) {
if (runif(1) > Q) {
return(sample(c(1,0), 1))
} else {
return(ground_truth)
}
}
##irr::kripp.alpha(matrix(c(obs_x, obs_x2), nrow = 2, byrow = TRUE), method = "nominal")
### Which is very close to
## cor(obs_x, obs_x2)
.sim <- function(N = 100, P = 0.5, Q = 0.8) {
real_x <- rbinom(N, 1, P)
obs_x <- purrr::map_dbl(real_x, .emulate_coding, Q = Q)
### then learn w from obs_x and k
obs_x2 <- purrr::map_dbl(real_x, .emulate_coding, Q = Q)
ra <- sum(diag(table(obs_x, obs_x2))) / N ## raw agreement
rr <- cor(obs_x, obs_x2)
irr <- irr::kripp.alpha(matrix(c(obs_x, obs_x2), nrow = 2, byrow = TRUE), method = "nominal")$value
return(data.frame(N, P, Q, ra, rr, irr))
}
N <- c(50, 100, 300)
P <- c(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9)
Q <- c(0.5, 0.6, 0.7, 0.8, 0.9, 1)
conditions <- tibble::tibble(expand.grid(N, P, Q))
colnames(conditions) <- c("N", "P", "Q")
res <- list()
for (i in seq_len(nrow(conditions))) {
print(i)
res[[i]] <- purrr::map_dfr(rep(NA, 100), ~ .sim(conditions$N[i], conditions$P[i], conditions$Q[i]))
}
conditions$res <- res
require(dplyr)
conditions %>% mutate(mra = purrr::map_dbl(res, ~mean(.$ra, na.rm = TRUE)), mrr = purrr::map_dbl(res, ~mean(.$rr, na.rm = TRUE)), mirr = purrr::map_dbl(res, ~mean(.$irr, na.rm = TRUE))) %>% lm(mirr~0+P+poly(Q, 2), data =.) %>% summary

View File

@ -1,81 +0,0 @@
.emulate_coding <- function(ground_truth, Q = 1) {
if (runif(1) > Q) {
return(sample(c(0, 1), size = 1, replace = TRUE))
} else {
return(ground_truth)
}
}
distort_gt <- function(x, Q = NULL) {
return(purrr::map_dbl(x, .emulate_coding, Q = Q))
}
N <- c(1000, 3600, 14400)
m <- c(75, 150, 300)
B0 <- c(0, 0.1, 0.3)
Bxy <- c(0.1, 0.2, 0.5)
Q <- c(.6, .8, .9)
conditions <- expand.grid(N, m, B0, Bxy, Q)
colnames(conditions) <- c("N", "m", "B0", "Bxy", "Q")
logistic <- function(x) {1/(1+exp(-1*x))}
.step <- function(i, Bxy, B0, Q, N, m) {
x <- rbinom(N, 1, 0.5)
y <- Bxy * x + rnorm(N, 0, .5) + B0
dx <- as.numeric(distort_gt(x, Q = Q))
randomidx <- sample(seq(N), m)
coder1x <- distort_gt(x[randomidx], Q = Q)
coder2x <- distort_gt(x[randomidx], Q = Q)
coding_data <- matrix(c(as.numeric(coder1x), as.numeric(coder2x)), nrow = 2, byrow = TRUE)
alpha <- irr::kripp.alpha(coding_data, method = "nominal")
estimated_q <- alpha$value^(1/2)
estimated_q2 <- alpha$value
res <- data.frame(x = as.factor(x), y = y, dx = as.factor(dx))
small_y <- y[randomidx]
small_x <- x[randomidx]
naive_mod <- glm(y~dx, data = res, x = TRUE, y = TRUE)
real_mod <- glm(y~x, data = res, x = TRUE, y = TRUE)
m1 <- glm(small_y~coder1x)
m2 <- glm(small_y~coder2x)
m3 <- glm(small_y~small_x)
correct_only_idx <- coder1x == coder2x
m4 <- glm(small_y[correct_only_idx] ~ small_x[correct_only_idx])
lab_only_gt <- coef(m3)[2]
lab_only_avg <- mean(coef(m1)[2], coef(m2)[2])
lab_only_correct_only <- coef(m4)[2]
return(tibble::tibble(N, m, Q, Bxy, B0, estimated_q, naive_Bxy = as.numeric(coef(naive_mod)[2]), real_Bxy = as.numeric(coef(real_mod)[2]), lab_only_gt= lab_only_gt, lab_only_avg = lab_only_avg, lab_only_correct_only = lab_only_correct_only))
}
## res <- list()
## for (i in seq(nrow(conditions))) {
## message(i)
## res[[i]] <- purrr::map_dfr(1:100, ~.step(., conditions$Bxy[i], conditions$B0[i], conditions$Q[i], conditions$N[i], conditions$m[i]))
## }
require(furrr)
plan(multisession)
.run <- function(i, conditions) {
purrr::map_dfr(1:100, ~.step(., conditions$Bxy[i], conditions$B0[i], conditions$Q[i], conditions$N[i], conditions$m[i]))
}
res <- future_map(seq(nrow(conditions)), .run, conditions = conditions, .progress = TRUE)
##saveRDS(res, "rubin_res.RDS")
conditions <- tibble::as_tibble(conditions)
conditions$res <- res
require(tidyverse)
conditions %>% mutate(loco_median = purrr::map_dbl(res, ~median(.$lab_only_correct_only)), loco_p025 = purrr::map_dbl(res, ~quantile(.$lab_only_correct_only, probs = 0.025)), loco_p975 = purrr::map_dbl(res, ~quantile(.$lab_only_correct_only, probs = 0.975))) %>% mutate(loa_median = purrr::map_dbl(res, ~median(.$lab_only_avg)), loa_p025 = purrr::map_dbl(res, ~quantile(.$lab_only_avg, probs = 0.025)), loa_p975 = purrr::map_dbl(res, ~quantile(.$lab_only_avg, probs = 0.975))) %>% filter(B0 == 0.1 & Bxy == 0.5) %>% select(N, m, Q, starts_with("loco"), starts_with("loa")) %>% pivot_longer(cols = loco_median:loa_p975, names_to = c("type", "tile"),names_pattern = "(.*)_(.*)", values_to = "value") %>% pivot_wider(names_from = "tile") %>% ggplot(aes(x = Q, y = median, ymin = p025, ymax = p975, fill = type, col = type)) + geom_line() + geom_ribbon(alpha = 0.2) + facet_grid(N~m) + geom_hline(yintercept = .5, linetype = 2, col = "grey")

View File

@ -1,56 +0,0 @@
##install.packages(c("purrr", "simex", "irr"))
.emulate_coding <- function(ground_truth, Q = 1) {
if (runif(1) > Q) {
return(sample(c(0, 1), size = 1, replace = TRUE))
} else {
return(ground_truth)
}
}
distort_gt <- function(x, Q = NULL) {
return(purrr::map_dbl(x, .emulate_coding, Q = Q))
}
N <- c(1000, 3600, 14400)
m <- c(75, 150, 300)
B0 <- c(0, 0.1, 0.3)
Bxy <- c(0.1, 0.2, 0.5)
Q <- c(.6, .8, .9)
conditions <- expand.grid(N, m, B0, Bxy, Q)
logistic <- function(x) {1/(1+exp(-1*x))}
.step <- function(Bxy, B0, Q, N, m) {
x <- rbinom(N, 1, 0.5)
y <- Bxy * x + rnorm(N, 0, .5) + B0
dx <- as.numeric(distort_gt(x, Q = Q))
randomx <- sample(x, m)
coder1x <- distort_gt(randomx, Q = Q)
coder2x <- distort_gt(randomx, Q = Q)
coding_data <- matrix(c(as.numeric(coder1x), as.numeric(coder2x)), nrow = 2, byrow = TRUE)
alpha <- irr::kripp.alpha(coding_data, method = "nominal")
estimated_q <- alpha$value^(1/2)
estimated_q2 <- alpha$value
res <- data.frame(x = as.factor(x), y = y, dx = as.factor(dx))
naive_mod <- glm(y~dx, data = res, x = TRUE, y = TRUE)
real_mod <- glm(y~x, data = res, x = TRUE, y = TRUE)
px <- matrix(c(estimated_q, 1-estimated_q, 1-estimated_q, estimated_q), nrow = 2)
colnames(px) <- levels(res$dx)
corrected_mod <- simex::mcsimex(naive_mod, SIMEXvariable = "dx", mc.matrix = px, jackknife.estimation = FALSE, B = 300)
px2 <- matrix(c(estimated_q2, 1-estimated_q2, 1-estimated_q2, estimated_q2), nrow = 2)
colnames(px2) <- levels(res$dx)
corrected_mod2 <- simex::mcsimex(naive_mod, SIMEXvariable = "dx", mc.matrix = px2, jackknife.estimation = FALSE, B = 300)
return(tibble::tibble(N, m, Q, Bxy, B0, estimated_q, naive_Bxy = as.numeric(coef(naive_mod)[2]), real_Bxy = as.numeric(coef(real_mod)[2]), corrected_Bxy = coef(corrected_mod)[2], corrected_Bxy2 = coef(corrected_mod2)[2]))
}
## res <- .step(0.2, 0, 0.8, N = 1000, m = 100)

1
paper

@ -1 +0,0 @@
Subproject commit b135cac19e336001401d3ad533a12bdceab913ac

View File

@ -1,291 +0,0 @@
ls()
weight
weight
lablr
labelr
nrow(labelr)
names(labelr)
names(labelr$data)
labelr$data
labelr
names(labelr)
labelr$labelr
labelr$toxic
setwd("..")
q()
n
summary(toxicity_calibrated)
qplot(labelr$toxic,type='hist')
names(labelr)
labelr$n
labelr
names(labelr)
fbyg
gghist(fbyg$weight)
hist(fbyg$weight)
hist(log(fbyg$weight))
fbyg$weight==1
all(fbyg$weight==1)
fbyg$weight[fbyg$weight != 1]
fbyg[fbyg$weight != 1]
fbyg[,fbyg$weight != 1]
fbyg[[fbyg$weight != 1]]
fbyg[fbyg$weight != 1,]
names(labelr)
summary(toxicity_calibrated)
toxicity_calibrated
val.data
names(labelr)
labelr$data
labelr
labelr[data]
labelr[data=='yg']
labelr[,data=='yg']
labelr[data=='yg',]
labelr[labelr$data=='yg']
labelr[,labelr$data=='yg']
labelr[labelr$data=='yg',]
toxicity_calibrated
summary(toxicity_calibrated)
yg3
yg3[,['toxic','toxic_pred']]
yg3 %>% select('toxic','toxic_pred')
yg3 |> select('toxic','toxic_pred')
names(yg3)
yg3[,c('toxic_pred','toxic')]
corr(yg3[,c('toxic_pred','toxic')])
cor(yg3[,c('toxic_pred','toxic')])
cor(yg3[,c('toxic_pred','toxic')],na.rm=T)
cor(yg3[,c('toxic_pred','toxic')],rm.na=T)
?cor(yg3[,c('toxic_pred','toxic')],use=
?cor
cor(yg3[,c('toxic_pred','toxic')],use='all.obs')
?cor
cor(yg3[,c('toxic_pred','toxic')],use='complete.obs')
cor(yg3[,c('toxic_pred','toxic')],use='complete.obs',method='spearman')
?predict
yg3$toxic_pred
names(preds)
preds
preds
preds$error
preds
preds
summary(errormod)
summary(errormod)
summary(preds)
names(preds)
preds
resids
qplot(resids)
resids
?predict.lm
dnorm(1)
dnorm(2)
dnorm(1)
pnorm(1)
preds
p1 + p2
p1 + p2
p1
p2
preds
preds1 <- preds
preds1$diff - preds$diff
preds1$diff
preds1$diff - preds1$diff
preds1$diff - preds$diff
preds1$diff - preds$diff
preds1$diff - preds$diff
preds1$diff - preds$diff
preds1
preds
dnorm(-1)
dnorm(1)
pnorm(1)
pnorm(-1)
pnorm(2)
pnorm(9)
pnorm(6)
pnorm(2)
dnorm(0.95)
qnorm(0.95)
qnorm(0.841)
fulldata_preds
names(yg3)
yg3$toxic_feature_1
yg3$toxic_feature_2
yg3
yg3[,.(toxic_pred,toxic_var)]
yg3[,.(toxic_pred,toxicity_2_pred_sigma,toxicity_1_pred_sigma)]
yg3[,.(toxic_pred,toxicity_2_pred_sigma,toxicity_1_pred_sigma,cov(toxicity_2_pred,toxicity_1_pred))]
cov(1,2)
cov(c(1),c(3))
cov(c(1),c(3,2))
cov(c(1,1),c(3,2))
cov(c(1,2),c(3,2))
covterm
covterm
?cov
covterm
yg3
yg3[,.(toxic_pred,toxicity_2_pred_sigma,toxicity_1_pred_sigma,cov(toxicity_2_pred,toxicity_1_pred))]
yg3[,.(toxic_pred,toxicity_2_pred_sigma,toxicity_1_pred_sigma,toxic_var)]
yg3[,.(toxic_pred,toxicity_2_pred_sigma,toxicity_1_pred_sigma,toxic_var,toxic_sd)]
yg3
names(yg3)
print(sg)
print(sg)
1+1
library(stargazer)
stargazer(w1,w2,w3,w4,w5,t1,t2,t3,t4,t5, type="text",
keep = c("cond1","meantox","cond1:meantox","Constant"),
keep.stat=c("n","adj.rsq"),
model.numbers = F,
dep.var.labels = c("DV = Willingness to comment","DV = Toxicity of YG respondent comments"),
covariate.labels = c("Treatment (top comments shown)",
"Average toxicity of top comments",
"Treatment $\times$ top comments toxicity",
"Constant"),
add.lines = list(c("Article fixed effects","No","No","No","Yes","Yes","No","No","No","Yes","Yes")),
star.cutoffs = c(0.05,0.01,0.005),
notes = "Standard errors are clustered at the respondent level.",
column.labels = c("(1)","(2)","(3)","(4)","(5)","(6)","(7)","(8)","(9)","(10)"),
style = "apsr")
q()
n
yglabels
labelr
names(labelr)
fb
names(fb
)
fb.comment_id
fb['comment_id']
fb[,'comment_id']
labelr[,'comment_id']
names(fb)
fb.labeled
names(fb.labeled)
names(yg)
?amelia
yg
names(yg)
names(yg3)
?rbind
nrow(yg3)
nrow(yg)
yg3[,.(.N),by=.(toxic,fb)]
yg3.toimpute
names(yg3.toimpute)
yg3.toimpute
names(yg3.toimpute)
names(labelr)
nrow(yg3)
nrow(labelr)
?merge.data.table
labelr
is.data.table(labelr)
yg3.toimpute
overimp.grid
overimp.grid
?amelia
q()
n
setwd("presentations/ica_hackathon_2022/")
ls()
attach(r)
example_2_B.plot.df
library(ggplot2)
example_2_B.plot.df[(variable=='x') && (m < 1000)]
example_2_B.plot.df[(variable=='x') && (m < 1000)]
theme_set(theme_default())
theme_set(theme_minimal())
theme_set(theme_classic())
example_2_B.plot.df[(variable=='x') && (m < 1000)]
example_2_B.plot.df[(variable=='x') && (m < 1000),unique(method)]
as.factor
update.packages()
update.packages()
update.packages()
cancel
plot.df
example_2_B.plot.df
plot.df
example_2_B.plot.df
example_2_B.plot.df$method %>% unique
example_2_B.plot.df$method |> unique
example_2_B.plot.df$method |> uniq
unique(example_2_B.plot.df$method)
example_2_B.plot.df$method
example_2_B.plot.df$method
example_2_B.plot.df$method
example_2_B.plot.df$method
example_2_B.plot.df <- r$example_2_B.plot.df
q()
n
setwd("presentations/ica_hackathon_2022/')
setwd("presentations/ica_hackathon_2022/")
example_2_B.plot.df$method
example_2_B.plot.df$method
q()
n
example_2_B.plot.df$method
example_2_B.plot.df$method
q()
n
example_2_B.plot.df$method
example_2_B.plot.df$method
q()
n
q()
n
plot.df
plot.df
plot.df[,.N,by=.(N,m)]
plot.df[,.N,by=.(N,m,method)]
plot.df[variable=='x',.N,by=.(N,m,method)]
plot.df
plot.df[(variable=='x') & (m < 1000) & (!is.na(p.true.in.ci))]
plot.df[(variable=='x') & (m != 1000) & (!is.na(p.true.in.ci))]
plot.df
?label_wrap_gen
install.packages("ggplot2")
devtools::install_github("tidyverse/ggplot2")
2
library(ggplot2)
ggplot2::version
sessioninfo()
sessionInfo()
q()
n
sessionInfo()
?scale_x_discrete
?facet_grid
plot.df
plot.df
plot.df[method="2SLS+gmm"]
plot.df[method=="2SLS+gmm"]
df <- example_2_B.plot.df
df
q()
n
plot.df
plot.df[m=50]
plot.df[m==50]
plot.df.example.2[m==50][method=2SLS+gmm]
plot.df.example.2[m==50][method==2SLS+gmm]
plot.df.example.2[(m==50) & (method==2SLS+gmm)]
plot.df.example.2[(m==50) & (method=="2SLS+gmm")]
plot.df[m==50]
plot.df.example.3
plot.df.example.3
plot.df.example.3[N=25000]
plot.df.example.3[N==25000]
plot.df
plot.df
plot.df
q()
n

View File

@ -1,26 +0,0 @@
#!/usr/bin/make
all:html pdf
html: $(patsubst %.Rmd,%.html,$(wildcard *.Rmd))
pdf: $(patsubst %.Rmd,%.pdf,$(wildcard *.Rmd))
remembr.RDS:
rsync klone:/gscratch/comdata/users/nathante/ml_measurement_error/mi_simulations/remembr.RDS .
%.pdf: %.html
Rscript -e 'xaringan::decktape("$<","$@",docker=FALSE)'
%.html: %.Rmd *.css remembr.RDS
Rscript -e 'library(rmarkdown); rmarkdown::render("$<", output_file = "$@")'
# firefox "$@"
clean:
rm *.html
rm -r *_files
rm -r *_cache
publish: all pdf
scp -r *.html groc:/home/nathante/public_html/slides/measurement_error_comdatahack_2022.html
scp -r *.pdf groc:/home/nathante/public_html/slides/measurement_error_comdatahack_2022.pdf
.PHONY: clean all

File diff suppressed because one or more lines are too long

View File

@ -1,724 +0,0 @@
---
title: "How good of a model do you need? Accounting for classification errors in machine assisted content analysis."
author: Nathan TeBlunthuis
date: May 24 2022
template: "../resources/template.html"
output:
xaringan::moon_reader:
lib_dir: libs
seal: false
nature:
highlightStyle: github
ratio: 16:9
countIncrementalSlides: true
slideNumberFormat: |
<div class="progress-bar-container">
<div class="progress-bar" style="width: calc(%current% / %total% * 100%);">
</div>
</div>
self_contained: false
css: [default, my-theme.css, fontawesome.min.css]
chakra: libs/remark-latest.min.js
---
```{r echo=FALSE, warning=FALSE, message=FALSE}
library(knitr)
library(ggplot2)
library(data.table)
library(icons)
f <- function (x) {formatC(x, format="d", big.mark=',')}
theme_set(theme_bw())
r <- readRDS('remembr.RDS')
attach(r)
```
class: center, middle, narrow
<script type='javascript'>
window.MathJax = {
loader: {load: ['[tex]/xcolor']},
tex: {packages: {'[+]': ['xcolor']}}
};
</script>
<div class="my-header"></div>
### .title-heading[Unlocking the power of big data: The importance of measurement error in machine assisted content analysis]
## Nathan TeBlunthuis
<img src="images/nu_logo.png" height="170px" style="padding:21px"/> <img src="images/uw_logo.png" height="170px" style="padding:21px"/> <img src="images/cdsc_logo.png" height="170px" style="padding:21px"/>
`r icons::fontawesome('envelope')` nathan.teblunthuis@northwestern.edu
`r icons::fontawesome('globe')` [https://teblunthuis.cc](https://teblunthuis.cc)
???
This talk will be me presenting my "lab notebook" and not a polished research talk. Maybe it would be a good week of a graduate seminar? In sum, machine assisted content analysis has unique limitations and threats to validity that I wanted to understand better. I've learned how the noise introduced by predictive models can result in misleading statistical inferences, but that a sample of human-labeled validation data can often be used to account for this noise and obtain accurate inferences in the end. Statistical knowledge of this problem and computational tools for addressing are still in development. My goals for this presentation are to start sharing this information with the community and hopeful to stimulate us to work on extending existing approaches or using them in our work.
This is going to be a boring talk about some *very* technical material. If you're not that interested please return to your hackathon. Please interrupt me if I'm going too fast for you or if you don't understand something. I will try to move quickly in the interests of those wishing to wrap up their hackathon projects. I will also ask you to show hands once or twice, if you are already familiar with some concepts that it might be expedient to skip.
---
class:center, middle, inverse
## Machine assistent content analysis (MACA)
???
I'm going to start by defining a study design that is increasingly common, especially in Communication and Political Science, but also across the social sciences and beyond. I call it *machine assisted content analysis* (MACA).
---
<div class="my-header"></div>
### .border[Machine assisted content analysis (MACA) uses machine learning for scientific measurement.]
.emph[Content analysis:] Statistical analysis of variables measured by human labeling ("coding") of content. This might be simple categorical labels, or maybe more advanced annotations.
--
*Downside:* Human labeling is *a lot* of work.
--
.emph[Machine assisted content analysis:] Use a *predictive algorithm* (often trained on human-made labels) to measure variables for use in a downstream *primary analysis.*
--
*Downside:* Algorithms can be *biased* and *inaccurate* in ways that could invalidate the statistical analysis.
???
A machine assisted content analysis can be part of a more complex or more powerful study design (e.g., an experiment, time series analysis &c).
---
<!-- <div class="my-header"></div> -->
<!-- ### .border[Hypothetical Example: Predicting Racial Harassement in Social Media Comments] -->
---
class:large
<div class="my-header"></div>
### .border[How can MACA go wrong?]
Algorithms can be *biased* and *error prone* (*noisy*).
--
Predictor bias is a potentially difficult problem that requires causal inference methods. I'll focus on *noise* for now.
--
Noise in the predictive model introduces bias in the primary analysis.
--
.indent[We can reduce and sometimes even *eliminate* this bias introduced by noise.]
---
layout:true
<div class="my-header"></div>
### .border[Example 1: An unbiased, but noisy classifier]
.large[.left-column[![](images/example_1_dag.png)]]
???
Please show hands if you are familiar with causal graphs or baysian networks. Should I explain what this diagram means?
---
.right-column[
$x$ is *partly observed* because we have *validation data* $x^*$.
]
---
.right-column[
$x$ is *partly observed* because we have *validation data* $x^*$.
$k$ are the *features* used by the *predictive model* $g(k)$.
]
---
.right-column[
$x$ is *partly observed* because we have *validation data* $x^*$.
$k$ are the *features* used by the *predictive model* $g(k)$.
The predictions $w$ are a *proxy variable* $g(k) = \hat{x} = w$.
]
---
.right-column[
$x$ is *partly observed* because we have *validation data* $x^*$.
$k$ are the *features* used by the *predictive model* $g(k)$.
The predictions $w$ are a *proxy variable* $g(k) = \hat{x} = w$.
$x = w + \xi$ because the predictive model makes errors.
]
---
layout:true
<div class="my-header"></div>
### .border[Noise in a *covariate* creates *attenuation bias*.]
.large[.left-column[![](images/example_1_dag.png)]]
---
.right-column[
We want to estimate, $y = Bx + \varepsilon$, but we estimate $y = Bw + \varepsilon$ instead.
$x = w + \xi$ because the predictive model makes errors.
]
---
.right-column[
We want to estimate, $y = Bx + \varepsilon$, but we estimate $y = Bw + \varepsilon$ instead.
$x = w + \xi$ because the predictive model makes errors.
Assume $g(k)$ is *unbiased* so $E(\xi)=0$. Also assume error is *nondifferential* so $E(\xi y)=0$:
]
---
.right-column[
We want to estimate, $y = Bx + \varepsilon$, but we estimate $y = Bw + \varepsilon$ instead.
$x = w + \xi$ because the predictive model makes errors.
Assume $g(k)$ is *unbiased* so $E(\xi)=0$. Also assume error is *nondifferential* so $E(\xi y)=0$:
$$\widehat{B_w}^{ols}=\frac{\sum^n_{j=j}{(x_j + \xi_j - \overline{(x + \xi)})}(y_j - \bar{y})}{\sum_{j=1}^n{(x_j + \xi_j - \overline{(x+\xi)})^2}} = \frac{\sum^n_{j=j}{(x_j - \bar{x})(y_j -
\bar{y})}}{\sum_{j=1}^n{(x_j + \xi_j - \bar{x}){^2}}}$$
]
---
.right-column[
We want to estimate, $y = Bx + \varepsilon$, but we estimate $y = Bw + \varepsilon$ instead.
$x = w + \xi$ because the predictive model makes errors.
Assume $g(k)$ is *unbiased* so $E(\xi)=0$. Also assume error is *nondifferential* so $E(\xi y)=0$:
$$\widehat{B_w}^{ols}=\frac{\sum^n_{j=j}{(x_j + \xi_j - \overline{(x + \xi)})}(y_j - \bar{y})}{\sum_{j=1}^n{(x_j + \xi_j - \overline{(x+\xi)})^2}} = \frac{\sum^n_{j=j}{(x_j - \bar{x})(y_j -
\bar{y})}}{\sum_{j=1}^n{(x_j + \color{red}{\xi_j} - \bar{x})\color{red}{^2}}}$$
In this scenario, it's clear that $\widehat{B_w}^{ols} < B_x$.
]
???
Please raise your hands if you're familiar with attenuation bias. I expect that its covered in some graduate stats classes, but not universally.
---
class:large
layout:false
<div class="my-header"></div>
### .border[Beyond attenuation bias]
.larger[Measurement error can theaten validity because:]
- Attenuation bias *spreads* (e.g., to marginal effects as illustrated later).
--
- Measurement error can be *differential*— not distributed evenly and possible correlated with $x$, $y$, or $\varepsilon$.
--
- *Bias can be away from 0* in GLMs and nonlinear models or if measurement error is differential.
--
- *Confounding* if the *predictive model is biased* introducing a correlation the measurement error and the residuals $(E[\xi\varepsilon]=0)$.
---
class:large
layout:false
<div class="my-header"></div>
### .border[Correcting measurement error]
There's a vast literature in statistics on measurement error. Mostly about noise you'd find in sensors. Lots of ideas. No magic bullets.
--
I'm going to briefly cover 3 different approaches: *multiple imputation*, *regression calibration* and *2SLS+GMM*.
--
These all depend on *validation data*. I'm going to ignore where this comes from, but assume it's a random sample of the hypothesis testing dataset.
--
You can *and should* use it to improve your statistical estimates.
---
<div class="my-header"></div>
### .border[Multiple Imputation (MI) treats Measurement Error as a Missing Data Problem]
1. Use validation data to estimate $f(x|w,y)$, a probabilistic model of $x$.
--
2. *Sample* $m$ datasets from $\widehat{f(x|w,y)}$.
--
3. Run your analysis on each of the $m$ datasets.
--
4. Average the results from the $m$ analyses using Rubin's rules.
--
.e[Advantages:] *Very flexible!* Sometimes can work if the predictor $g(k) $ is biased. Good R packages (**`{Amelia}`**, `{mi}`, `{mice}`, `{brms}`).
--
.e[Disadvantages:] Results depend on quality of $\widehat{f(x|w,y)}$; May require more validation data, computationally expensive, statistically inefficient and doesn't seem to benefit much from larger datasets.
---
### .border[Regression calibration directly adjusts for attenuation bias.]
1. Use validation data to estimate the errors $\hat{\xi}$.
--
2. Use $\hat{\xi}$ to correct the OLS estimate.
--
3. Correct the standard errors using MLE or bootstrapping.
--
.e[Advantages:] Simple, fast.
--
.e[Disadvantages:] Limited to OLS models. Requires an unbiased predictor $g(k)$. R support (`{mecor}` R package) is pretty new.
---
layout:true
### .border[2SLS+GMM is designed for this specific problem]
.left-column[![](images/Fong_Taylor.png)]
*Regression calibration with a trick.*
---
.right-column[
1. Estimate $x = w + \xi$ to obtain $\hat{x}$. (First-stage LS).
]
---
.right-column[
1. Estimate $x = w + \xi$ to obtain $\hat{x}$. (First-stage LS).
2. Estimate $y = B^{2sls}\hat{x} + \varepsilon^{2sls}$. (Second-stage LS / regression calibration).
]
---
.right-column[
1. Estimate $x = w + \xi$ to obtain $\hat{x}$. (First-stage LS).
2. Estimate $y = B^{2sls}\hat{x} + \varepsilon^{2sls}$. (Second-stage LS / regression calibration).
3. Estimate $y = B^{val}x^* + \varepsilon^{val}$. (Validation dataset model).
]
---
.right-column[
1. Estimate $x = w + \xi$ to obtain $\hat{x}$. (First-stage LS).
2. Estimate $y = B^{2sls}\hat{x} + \varepsilon^{2sls}$. (Second-stage LS / regression calibration).
3. Estimate $y = B^{val}x^* + \varepsilon^{val}$. (Validation dataset model).
4. Combine $B^{val}$ and $B^{2sls}$ using the generalized method of moments (GMM).
]
---
.right-column[
1. Estimate $x = w + \xi$ to obtain $\hat{x}$. (First-stage LS).
2. Estimate $y = B^{2sls}\hat{x} + \varepsilon^{2sls}$. (Second-stage LS / regression calibration).
3. Estimate $y = B^{val}x^* + \varepsilon^{val}$. (Validation dataset model).
4. Combine $B^{val}$ and $B^{2sls}$ using the generalized method of moments (GMM).
Advantages: Accurate. Sometimes robust if biased predictor $g(k)$ is biased. In theory, flexible to any models that can be fit using GMM.
]
---
.right-column[
1. Estimate $x = w + \xi$ to obtain $\hat{x}$. (First-stage LS).
2. Estimate $y = B^{2sls}\hat{x} + \varepsilon^{2sls}$. (Second-stage LS / regression calibration).
3. Estimate $y = B^{val}x^* + \varepsilon^{val}$. (Validation dataset model).
4. Combine $B^{val}$ and $B^{2sls}$ using the generalized method of moments (GMM).
Advantages: Accurate. Sometimes robust if biased predictor $g(k)$ is biased. In theory, flexible to any models that can be fit using GMM.
Disadvantages: Implementation (`{predictionError}`) is new. API is cumbersome and only supports linear models. Not robust if $E(w\varepsilon) \ne 0$. GMM may be unfamiliar to audiences.
]
---
layout:false
### .border[Testing attention bias correction]
<div class="my-header"></div>
I've run simulations to test these approaches in several scenarios.
I simulate random data, fit 100 models and plot the average estimate and its variance.
The model is not very good: about 70% accurate.
Most plausible scenario:
y is continuous and normal-ish.
--
$x$ is binary (human labels) $P(x)=0.5$.
--
$w$ is the *continuous predictor* (e.g., probability) output of $f(x)$ (not binary predictions).
--
if $w$ is binary, most methods struggle, but regression calibration and 2SLS+GMM can do okay.
---
layout:false
### .border[Example 1: estimator of the effect of x]
.right-column[
```{r echo=FALSE, message=FALSE, warning=FALSE, result='asis', dev='svg', fig.width=7.5, fig.asp=.625,cache=F}
#plot.df <-
plot.df <- plot.df.example.1[,':='(method=factor(method,levels=c("Naive","Multiple imputation", "Multiple imputation (Classifier features unobserved)","Regression Calibration","2SLS+gmm","Feasible"),ordered=T),
N=factor(N),
m=factor(m))]
plot.df <- plot.df[(variable=='x') & (m != 1000) & (m!=500) & (N!=10000) & !is.na(p.true.in.ci) & (method!="Multiple imputation (Classifier features unobserved)")]
p <- ggplot(plot.df, aes(y=mean.est, ymax=mean.est + var.est/2, ymin=mean.est-var.est/2, x=method))
p <- p + geom_hline(aes(yintercept=0.2),linetype=2)
p <- p + geom_pointrange() + facet_grid(m~N,as.table=F) + scale_x_discrete(labels=label_wrap_gen(4))
print(p)
# get gtable object
```
]
.left-column[
All methods work in this scenario
Multiple imputation is inefficient.
]
---
### .border[What about bias?]
.left-column[
.large[![](images/example_2_dag.png)]
]
.right-column[
A few notes on this scenario.
$B_x = 0.2$, $B_g=-0.2$ and $sd(\varepsilon)=3$. So the signal-to-noise ratio is high.
$r$ can be concieved of as a missing feature in the predictive model $g(k)$ that is also correlated with $y$.
For example $r$ might be the *race* of a commentor, $x$ could be *racial harassment*, $y$ whether the commentor gets banned and $k$ only has textual features but human coders can see user profiles to know $r$.
]
---
layout:false
### .border[Example 2: Estimates of the effect of x ]
.center[
```{r echo=FALSE, message=FALSE, warning=FALSE, result='asis', dev='svg', fig.width=8, fig.asp=.625,cache=F}
#plot.df <-
plot.df <- plot.df.example.2B[,':='(method=factor(method,levels=c("Naive","Multiple imputation", "Multiple imputation (Classifier features unobserved)","Regression Calibration","2SLS+gmm","Feasible"),ordered=T),
N=factor(N),
m=factor(m))]
plot.df <- plot.df[(variable=='x') & (m != 1000) & (m!=500) & (N!=10000) & !is.na(p.true.in.ci) & (method!="Multiple imputation (Classifier features unobserved)")]
p <- ggplot(plot.df, aes(y=mean.est, ymax=mean.est + var.est/2, ymin=mean.est-var.est/2, x=method))
p <- p + geom_hline(aes(yintercept=0.2),linetype=2)
p <- p + geom_pointrange() + facet_grid(m~N,as.table=F) + scale_x_discrete(labels=label_wrap_gen(4))
print(p)
# get gtable object
```
]
---
layout:false
### .border[Example 2: Estimates of the effect of r]
.center[
```{r echo=FALSE, message=FALSE, warning=FALSE, result='asis', dev='svg', fig.width=8, fig.asp=.625,cache=F}
#plot.df <-
plot.df <- plot.df.example.2B[,':='(method=factor(method,levels=c("Naive","Multiple imputation", "Multiple imputation (Classifier features unobserved)","Regression Calibration","2SLS+gmm","Feasible"),ordered=T),
N=factor(N),
m=factor(m))]
plot.df <- plot.df[(variable=='g') & (m != 1000) & (m!=500) & (N!=10000) & !is.na(p.true.in.ci) & (method!="Multiple imputation (Classifier features unobserved)")]
p <- ggplot(plot.df, aes(y=mean.est, ymax=mean.est + var.est/2, ymin=mean.est-var.est/2, x=method))
p <- p + geom_hline(aes(yintercept=-0.2),linetype=2)
p <- p + geom_pointrange() + facet_grid(m~N,as.table=F) + scale_x_discrete(labels=label_wrap_gen(4))
print(p)
```
]
---
layout:false
class:large
###.border[Takeaways from example 2]
Bias in the predictive model creates bias in hypothesis tests.
--
Bias can be corrected *in this case*.
--
The next scenario has bias that's more tricky.
--
Multiple imputation helps, but doesn't fully correct the bias.
---
layout:false
### .border[When will GMM+2SLS fail?]
.large[.left-column[![](images/example_3_dag.png)]]
.right-column[The catch with GMM:
.emph[Exclusion restriction:] $E[w \varepsilon] = 0$.
The restriction is violated if a variable $U$ causes both $K$ and $Y$ and $X$ causes $K$ (not visa-versa).
]
???
GMM optimizes a model to a system of equations of which the exclusion restriction is one. So if that assumption isn't true it will biased.
This is a different assumption than that of OLS or GLM models.
---
layout:false
### .border[Example 3: Estimates of the effect of x]
.center[
```{r echo=FALSE, message=FALSE, warning=FALSE, result='asis', dev='svg', fig.width=8, fig.asp=.625,cache=F}
#plot.df <-
plot.df <- plot.df.example.3[,':='(method=factor(method,levels=c("Naive","Multiple imputation", "Multiple imputation (Classifier features unobserved)","Regression Calibration","2SLS+gmm","Feasible"),ordered=T),
N=factor(N),
m=factor(m))]
plot.df <- plot.df[(variable=='x') & (m != 1000) & (m!=500) & (N!=10000) & (method!="Multiple imputation (Classifier features unobserved)")]
p <- ggplot(plot.df, aes(y=mean.est, ymax=mean.est + var.est/2, ymin=mean.est-var.est/2, x=method))
p <- p + geom_hline(aes(yintercept=0.2),linetype=2)
p <- p + geom_pointrange() + facet_grid(m~N,as.table=F) + scale_x_discrete(labels=label_wrap_gen(4))
print(p)
```
]
---
### .border[Takaways]
- Attenuation bias can be a big problem with noisy predictors—leading to small and biased estimates.
- For more general hypothesis tests or if the predictor is biased, measurement error can lead to false discovery.
- It's fixable with validation data—you may not need that much and you should already be getting it.
- This means it can be okay poor predictors for hypothesis testing.
- The ecosystem is underdeveloped, but a lot of methods have been researched.
- Take advantage of machine learning + big data and get precise estimates when the signal-to-noise ratio is high!
---
layout:false
### .border[Future work: Noise in the *outcome*]
I've been focusing on noise in *covariates.* What if the predictive algorithm is used to measure the *outcome* $y$?
--
This isn't a problem in the simplest case (linear regression with homoskedastic errors). Noise in $y$ is projected into the error term.
--
Noise in the outcome is still a problem if errors are heteroskedastic and for GLMs / non-linear regression (e.g., logistic regression).
--
Multiple imputation (in theory) could help here. The other method's aren't designed for this case.
--
Solving this problem could be an important methodological contribution with a very broad impact.
---
# .border[Questions?]
Links to slides:[html](https://teblunthuis.cc/~nathante/slides/ecological_adaptation_ica_2022.html) [pdf](https://teblunthuis.cc/~nathante/slides/ecological_adaptation_ica_2022.pdf)
Link to a messy git repository:[https://code.communitydata.science/ml_measurement_error_public.git](https://code.communitydata.science/ml_measurement_error_public.git)
`r icons::fontawesome("envelope")` nathan.teblunthuis@northwestern.edu
`r icons::fontawesome("twitter")` @groceryheist
`r icons::fontawesome("globe")` [https://communitydata.science](https://communitydata.science)
<!-- ### .border[Multiple imputation struggles with discrete variables] -->
<!-- In my experiments I've found that the 2SLS+GMM method works well with a broader range of data types. -->
<!-- To illustrate, Example 3 is the same as Example 2, but with $x$ and $w$ as discrete variables. -->
<!-- Practicallly speaking, a continuous "score" $w$ is often available, and my opinion is that usually this is better + more informative than model predictions in all cases. Continuous validation data may be more difficult to obtain, but it is often possible using techniques like pairwise comparison. -->
<!-- layout:false -->
<!-- ### .border[Example 3: Estimates of the effect of x ] -->
<!-- .center[ -->
<!-- ```{r echo=FALSE, message=FALSE, warning=FALSE, result='asis', dev='svg', fig.width=8, fig.asp=.625,cache=F} -->
<!-- #plot.df <- -->
<!-- plot.df <- plot.df.example.2[,':='(method=factor(method,levels=c("Naive","Multiple imputation", "Multiple imputation (Classifier features unobserved)","Regression Calibration","2SLS+gmm","Feasible"),ordered=T), -->
<!-- N=factor(N), -->
<!-- m=factor(m))] -->
<!-- plot.df <- plot.df[(variable=='x') & (m != 1000) & (m!=500) & (N!=5000) & (N!=10000) & !is.na(p.true.in.ci) & (method!="Multiple imputation (Classifier features unobserved)")] -->
<!-- p <- ggplot(plot.df, aes(y=mean.est, ymax=mean.est + var.est/2, ymin=mean.est-var.est/2, x=method)) -->
<!-- p <- p + geom_hline(aes(yintercept=0.2),linetype=2) -->
<!-- p <- p + geom_pointrange() + facet_grid(m~N,as.table=F) + scale_x_discrete(labels=label_wrap_gen(4)) -->
<!-- print(p) -->
<!-- # get gtable object -->
<!-- .large[.left [![](images/example_2_dag.png)]] -->
<!-- There are at two general ways using a predictive model can introduce bias: *attenuation*, and *confounding.* -->
<!-- Counfounding can be broken down into 4 types: -->
<!-- .right[Confounding on $X$ by observed variables -->
<!-- Confounding on $Y$ by observed variables -->
<!-- ] -->
<!-- .left[Confounding on $X$ by *un*observed variables -->
<!-- Confounding on $Y$ by *un*observed variables -->
<!-- ] -->
<!-- Attenuation and the top-right column can be dealt with relative ease using a few different methods. -->
<!-- The bottom-left column can be addressed, but so far I haven't found a magic bullet. -->
<!-- The left column is pretty much a hopeless situation. -->

View File

@ -1,757 +0,0 @@
<!DOCTYPE html>
<html lang="" xml:lang="">
<head>
<title>How good of a model do you need? Accounting for classification errors in machine assisted content analysis.</title>
<meta charset="utf-8" />
<meta name="author" content="Nathan TeBlunthuis" />
<script src="libs/header-attrs-2.14/header-attrs.js"></script>
<link href="libs/remark-css-0.0.1/default.css" rel="stylesheet" />
<link rel="stylesheet" href="my-theme.css" type="text/css" />
<link rel="stylesheet" href="fontawesome.min.css" type="text/css" />
</head>
<body>
<textarea id="source">
class: center, middle, narrow
&lt;script type='javascript'&gt;
window.MathJax = {
loader: {load: ['[tex]/xcolor']},
tex: {packages: {'[+]': ['xcolor']}}
};
&lt;/script&gt;
&lt;div class="my-header"&gt;&lt;/div&gt;
### .title-heading[Unlocking the power of big data: The importance of measurement error in machine assisted content analysis]
## Nathan TeBlunthuis
&lt;img src="images/nu_logo.png" height="170px" style="padding:21px"/&gt; &lt;img src="images/uw_logo.png" height="170px" style="padding:21px"/&gt; &lt;img src="images/cdsc_logo.png" height="170px" style="padding:21px"/&gt;
nathan.teblunthuis@northwestern.edu
[https://teblunthuis.cc](https://teblunthuis.cc)
???
This talk will be me presenting my "lab notebook" and not a polished research talk. Maybe it would be a good week of a graduate seminar? In sum, machine assisted content analysis has unique limitations and threats to validity that I wanted to understand better. I've learned how the noise introduced by predictive models can result in misleading statistical inferences, but that a sample of human-labeled validation data can often be used to account for this noise and obtain accurate inferences in the end. Statistical knowledge of this problem and computational tools for addressing are still in development. My goals for this presentation are to start sharing this information with the community and hopeful to stimulate us to work on extending existing approaches or using them in our work.
This is going to be a boring talk about some *very* technical material. If you're not that interested please return to your hackathon. Please interrupt me if I'm going too fast for you or if you don't understand something. I will try to move quickly in the interests of those wishing to wrap up their hackathon projects. I will also ask you to show hands once or twice, if you are already familiar with some concepts that it might be expedient to skip.
---
class:center, middle, inverse
## Machine assistent content analysis (MACA)
???
I'm going to start by defining a study design that is increasingly common, especially in Communication and Political Science, but also across the social sciences and beyond. I call it *machine assisted content analysis* (MACA).
---
&lt;div class="my-header"&gt;&lt;/div&gt;
### .border[Machine assisted content analysis (MACA) uses machine learning for scientific measurement.]
.emph[Content analysis:] Statistical analysis of variables measured by human labeling ("coding") of content. This might be simple categorical labels, or maybe more advanced annotations.
--
*Downside:* Human labeling is *a lot* of work.
--
.emph[Machine assisted content analysis:] Use a *predictive algorithm* (often trained on human-made labels) to measure variables for use in a downstream *primary analysis.*
--
*Downside:* Algorithms can be *biased* and *inaccurate* in ways that could invalidate the statistical analysis.
???
A machine assisted content analysis can be part of a more complex or more powerful study design (e.g., an experiment, time series analysis &amp;c).
---
&lt;!-- &lt;div class="my-header"&gt;&lt;/div&gt; --&gt;
&lt;!-- ### .border[Hypothetical Example: Predicting Racial Harassement in Social Media Comments] --&gt;
---
class:large
&lt;div class="my-header"&gt;&lt;/div&gt;
### .border[How can MACA go wrong?]
Algorithms can be *biased* and *error prone* (*noisy*).
--
Predictor bias is a potentially difficult problem that requires causal inference methods. I'll focus on *noise* for now.
--
Noise in the predictive model introduces bias in the primary analysis.
--
.indent[We can reduce and sometimes even *eliminate* this bias introduced by noise.]
---
layout:true
&lt;div class="my-header"&gt;&lt;/div&gt;
### .border[Example 1: An unbiased, but noisy classifier]
.large[.left-column[![](images/example_1_dag.png)]]
???
Please show hands if you are familiar with causal graphs or baysian networks. Should I explain what this diagram means?
---
.right-column[
`\(x\)` is *partly observed* because we have *validation data* `\(x^*\)`.
]
---
.right-column[
`\(x\)` is *partly observed* because we have *validation data* `\(x^*\)`.
`\(k\)` are the *features* used by the *predictive model* `\(g(k)\)`.
]
---
.right-column[
`\(x\)` is *partly observed* because we have *validation data* `\(x^*\)`.
`\(k\)` are the *features* used by the *predictive model* `\(g(k)\)`.
The predictions `\(w\)` are a *proxy variable* `\(g(k) = \hat{x} = w\)`.
]
---
.right-column[
`\(x\)` is *partly observed* because we have *validation data* `\(x^*\)`.
`\(k\)` are the *features* used by the *predictive model* `\(g(k)\)`.
The predictions `\(w\)` are a *proxy variable* `\(g(k) = \hat{x} = w\)`.
`\(x = w + \xi\)` because the predictive model makes errors.
]
---
layout:true
&lt;div class="my-header"&gt;&lt;/div&gt;
### .border[Noise in a *covariate* creates *attenuation bias*.]
.large[.left-column[![](images/example_1_dag.png)]]
---
.right-column[
We want to estimate, `\(y = Bx + \varepsilon\)`, but we estimate `\(y = Bw + \varepsilon\)` instead.
`\(x = w + \xi\)` because the predictive model makes errors.
]
---
.right-column[
We want to estimate, `\(y = Bx + \varepsilon\)`, but we estimate `\(y = Bw + \varepsilon\)` instead.
`\(x = w + \xi\)` because the predictive model makes errors.
Assume `\(g(k)\)` is *unbiased* so `\(E(\xi)=0\)`. Also assume error is *nondifferential* so `\(E(\xi y)=0\)`:
]
---
.right-column[
We want to estimate, `\(y = Bx + \varepsilon\)`, but we estimate `\(y = Bw + \varepsilon\)` instead.
`\(x = w + \xi\)` because the predictive model makes errors.
Assume `\(g(k)\)` is *unbiased* so `\(E(\xi)=0\)`. Also assume error is *nondifferential* so `\(E(\xi y)=0\)`:
`$$\widehat{B_w}^{ols}=\frac{\sum^n_{j=j}{(x_j + \xi_j - \overline{(x + \xi)})}(y_j - \bar{y})}{\sum_{j=1}^n{(x_j + \xi_j - \overline{(x+\xi)})^2}} = \frac{\sum^n_{j=j}{(x_j - \bar{x})(y_j -
\bar{y})}}{\sum_{j=1}^n{(x_j + \xi_j - \bar{x}){^2}}}$$`
]
---
.right-column[
We want to estimate, `\(y = Bx + \varepsilon\)`, but we estimate `\(y = Bw + \varepsilon\)` instead.
`\(x = w + \xi\)` because the predictive model makes errors.
Assume `\(g(k)\)` is *unbiased* so `\(E(\xi)=0\)`. Also assume error is *nondifferential* so `\(E(\xi y)=0\)`:
`$$\widehat{B_w}^{ols}=\frac{\sum^n_{j=j}{(x_j + \xi_j - \overline{(x + \xi)})}(y_j - \bar{y})}{\sum_{j=1}^n{(x_j + \xi_j - \overline{(x+\xi)})^2}} = \frac{\sum^n_{j=j}{(x_j - \bar{x})(y_j -
\bar{y})}}{\sum_{j=1}^n{(x_j + \color{red}{\xi_j} - \bar{x})\color{red}{^2}}}$$`
In this scenario, it's clear that `\(\widehat{B_w}^{ols} &lt; B_x\)`.
]
???
Please raise your hands if you're familiar with attenuation bias. I expect that its covered in some graduate stats classes, but not universally.
---
class:large
layout:false
&lt;div class="my-header"&gt;&lt;/div&gt;
### .border[Beyond attenuation bias]
.larger[Measurement error can theaten validity because:]
- Attenuation bias *spreads* (e.g., to marginal effects as illustrated later).
--
- Measurement error can be *differential*— not distributed evenly and possible correlated with `\(x\)`, `\(y\)`, or `\(\varepsilon\)`.
--
- *Bias can be away from 0* in GLMs and nonlinear models or if measurement error is differential.
--
- *Confounding* if the *predictive model is biased* introducing a correlation the measurement error and the residuals `\((E[\xi\varepsilon]=0)\)`.
---
class:large
layout:false
&lt;div class="my-header"&gt;&lt;/div&gt;
### .border[Correcting measurement error]
There's a vast literature in statistics on measurement error. Mostly about noise you'd find in sensors. Lots of ideas. No magic bullets.
--
I'm going to briefly cover 3 different approaches: *multiple imputation*, *regression calibration* and *2SLS+GMM*.
--
These all depend on *validation data*. I'm going to ignore where this comes from, but assume it's a random sample of the hypothesis testing dataset.
--
You can *and should* use it to improve your statistical estimates.
---
&lt;div class="my-header"&gt;&lt;/div&gt;
### .border[Multiple Imputation (MI) treats Measurement Error as a Missing Data Problem]
1. Use validation data to estimate `\(f(x|w,y)\)`, a probabilistic model of `\(x\)`.
--
2. *Sample* `\(m\)` datasets from `\(\widehat{f(x|w,y)}\)`.
--
3. Run your analysis on each of the `\(m\)` datasets.
--
4. Average the results from the `\(m\)` analyses using Rubin's rules.
--
.e[Advantages:] *Very flexible!* Sometimes can work if the predictor $g(k) $ is biased. Good R packages (**`{Amelia}`**, `{mi}`, `{mice}`, `{brms}`).
--
.e[Disadvantages:] Results depend on quality of `\(\widehat{f(x|w,y)}\)`; May require more validation data, computationally expensive, statistically inefficient and doesn't seem to benefit much from larger datasets.
---
### .border[Regression calibration directly adjusts for attenuation bias.]
1. Use validation data to estimate the errors `\(\hat{\xi}\)`.
--
2. Use `\(\hat{\xi}\)` to correct the OLS estimate.
--
3. Correct the standard errors using MLE or bootstrapping.
--
.e[Advantages:] Simple, fast.
--
.e[Disadvantages:] Limited to OLS models. Requires an unbiased predictor `\(g(k)\)`. R support (`{mecor}` R package) is pretty new.
---
layout:true
### .border[2SLS+GMM is designed for this specific problem]
.left-column[![](images/Fong_Taylor.png)]
*Regression calibration with a trick.*
---
.right-column[
1. Estimate `\(x = w + \xi\)` to obtain `\(\hat{x}\)`. (First-stage LS).
]
---
.right-column[
1. Estimate `\(x = w + \xi\)` to obtain `\(\hat{x}\)`. (First-stage LS).
2. Estimate `\(y = B^{2sls}\hat{x} + \varepsilon^{2sls}\)`. (Second-stage LS / regression calibration).
]
---
.right-column[
1. Estimate `\(x = w + \xi\)` to obtain `\(\hat{x}\)`. (First-stage LS).
2. Estimate `\(y = B^{2sls}\hat{x} + \varepsilon^{2sls}\)`. (Second-stage LS / regression calibration).
3. Estimate `\(y = B^{val}x^* + \varepsilon^{val}\)`. (Validation dataset model).
]
---
.right-column[
1. Estimate `\(x = w + \xi\)` to obtain `\(\hat{x}\)`. (First-stage LS).
2. Estimate `\(y = B^{2sls}\hat{x} + \varepsilon^{2sls}\)`. (Second-stage LS / regression calibration).
3. Estimate `\(y = B^{val}x^* + \varepsilon^{val}\)`. (Validation dataset model).
4. Combine `\(B^{val}\)` and `\(B^{2sls}\)` using the generalized method of moments (GMM).
]
---
.right-column[
1. Estimate `\(x = w + \xi\)` to obtain `\(\hat{x}\)`. (First-stage LS).
2. Estimate `\(y = B^{2sls}\hat{x} + \varepsilon^{2sls}\)`. (Second-stage LS / regression calibration).
3. Estimate `\(y = B^{val}x^* + \varepsilon^{val}\)`. (Validation dataset model).
4. Combine `\(B^{val}\)` and `\(B^{2sls}\)` using the generalized method of moments (GMM).
Advantages: Accurate. Sometimes robust if biased predictor `\(g(k)\)` is biased. In theory, flexible to any models that can be fit using GMM.
]
---
.right-column[
1. Estimate `\(x = w + \xi\)` to obtain `\(\hat{x}\)`. (First-stage LS).
2. Estimate `\(y = B^{2sls}\hat{x} + \varepsilon^{2sls}\)`. (Second-stage LS / regression calibration).
3. Estimate `\(y = B^{val}x^* + \varepsilon^{val}\)`. (Validation dataset model).
4. Combine `\(B^{val}\)` and `\(B^{2sls}\)` using the generalized method of moments (GMM).
Advantages: Accurate. Sometimes robust if biased predictor `\(g(k)\)` is biased. In theory, flexible to any models that can be fit using GMM.
Disadvantages: Implementation (`{predictionError}`) is new. API is cumbersome and only supports linear models. Not robust if `\(E(w\varepsilon) \ne 0\)`. GMM may be unfamiliar to audiences.
]
---
layout:false
### .border[Testing attention bias correction]
&lt;div class="my-header"&gt;&lt;/div&gt;
I've run simulations to test these approaches in several scenarios.
The model is not very good: about 70% accurate.
Most plausible scenario:
y is continuous and normal-ish.
--
`\(x\)` is binary (human labels) `\(P(x)=0.5\)`.
--
`\(w\)` is the *continuous predictor* (e.g., probability) output of `\(f(x)\)` (not binary predictions).
--
if `\(w\)` is binary, most methods struggle, but regression calibration and 2SLS+GMM can do okay.
---
layout:false
### .border[Example 1: estimator of the effect of x]
.right-column[
![](ica_hackathon_2022_files/figure-html/unnamed-chunk-2-1.svg)&lt;!-- --&gt;
]
.left-column[
All methods work in this scenario
Multiple imputation is inefficient.
]
---
### .border[What about bias?]
.left-column[
.large[![](images/example_2_dag.png)]
]
.right-column[
A few notes on this scenario.
`\(B_x = 0.2\)`, `\(B_g=-0.2\)` and `\(sd(\varepsilon)=3\)`. So the signal-to-noise ratio is high.
`\(r\)` can be concieved of as a missing feature in the predictive model `\(g(k)\)` that is also correlated with `\(y\)`.
For example `\(r\)` might be the *race* of a commentor, `\(x\)` could be *racial harassment*, `\(y\)` whether the commentor gets banned and `\(k\)` only has textual features but human coders can see user profiles to know `\(r\)`.
]
---
layout:false
### .border[Example 2: Estimates of the effect of x ]
.center[
![](ica_hackathon_2022_files/figure-html/unnamed-chunk-3-1.svg)&lt;!-- --&gt;
]
---
layout:false
### .border[Example 2: Estimates of the effect of r]
.center[
![](ica_hackathon_2022_files/figure-html/unnamed-chunk-4-1.svg)&lt;!-- --&gt;
]
---
layout:false
class:large
###.border[Takeaways from example 2]
Bias in the predictive model creates bias in hypothesis tests.
--
Bias can be corrected *in this case*.
--
The next scenario has bias that's more tricky.
--
Multiple imputation helps, but doesn't fully correct the bias.
---
layout:false
### .border[When will GMM+2SLS fail?]
.large[.left-column[![](images/example_3_dag.png)]]
.right-column[The catch with GMM:
.emph[Exclusion restriction:] `\(E[w \varepsilon] = 0\)`.
The restriction is violated if a variable `\(U\)` causes both `\(K\)` and `\(Y\)` and `\(X\)` causes `\(K\)` (not visa-versa).
]
???
GMM optimizes a model to a system of equations of which the exclusion restriction is one. So if that assumption isn't true it will biased.
This is a different assumption than that of OLS or GLM models.
---
layout:false
### .border[Example 3: Estimates of the effect of x]
.center[
![](ica_hackathon_2022_files/figure-html/unnamed-chunk-5-1.svg)&lt;!-- --&gt;
]
---
### .border[Takaways]
- Attenuation bias can be a big problem with noisy predictors—leading to small and biased estimates.
- For more general hypothesis tests or if the predictor is biased, measurement error can lead to false discovery.
- It's fixable with validation data—you may not need that much and you should already be getting it.
- This means it can be okay poor predictors for hypothesis testing.
- The ecosystem is underdeveloped, but a lot of methods have been researched.
- Take advantage of machine learning + big data and get precise estimates when the signal-to-noise ratio is high!
---
layout:false
### .border[Future work: Noise in the *outcome*]
I've been focusing on noise in *covariates.* What if the predictive algorithm is used to measure the *outcome* `\(y\)`?
--
This isn't a problem in the simplest case (linear regression with homoskedastic errors). Noise in `\(y\)` is projected into the error term.
--
Noise in the outcome is still a problem if errors are heteroskedastic and for GLMs / non-linear regression (e.g., logistic regression).
--
Multiple imputation (in theory) could help here. The other method's aren't designed for this case.
--
Solving this problem could be an important methodological contribution with a very broad impact.
---
# .border[Questions?]
Links to slides:[html](https://teblunthuis.cc/~nathante/slides/ecological_adaptation_ica_2022.html) [pdf](https://teblunthuis.cc/~nathante/slides/ecological_adaptation_ica_2022.pdf)
Link to a messy git repository:
&lt;i class="fa fa-envelope" aria-hidden='true'&gt;&lt;/i&gt; nathan.teblunthuis@northwestern.edu
&lt;i class="fa fa-twitter" aria-hidden='true'&gt;&lt;/i&gt; @groceryheist
&lt;i class="fa fa-globe" aria-hidden='true'&gt;&lt;/i&gt; [https://communitydata.science](https://communitydata.science)
&lt;!-- ### .border[Multiple imputation struggles with discrete variables] --&gt;
&lt;!-- In my experiments I've found that the 2SLS+GMM method works well with a broader range of data types. --&gt;
&lt;!-- To illustrate, Example 3 is the same as Example 2, but with `\(x\)` and `\(w\)` as discrete variables. --&gt;
&lt;!-- Practicallly speaking, a continuous "score" `\(w\)` is often available, and my opinion is that usually this is better + more informative than model predictions in all cases. Continuous validation data may be more difficult to obtain, but it is often possible using techniques like pairwise comparison. --&gt;
&lt;!-- layout:false --&gt;
&lt;!-- ### .border[Example 3: Estimates of the effect of x ] --&gt;
&lt;!-- .center[ --&gt;
&lt;!-- ```{r echo=FALSE, message=FALSE, warning=FALSE, result='asis', dev='svg', fig.width=8, fig.asp=.625,cache=F} --&gt;
&lt;!-- #plot.df &lt;- --&gt;
&lt;!-- plot.df &lt;- plot.df.example.2[,':='(method=factor(method,levels=c("Naive","Multiple imputation", "Multiple imputation (Classifier features unobserved)","Regression Calibration","2SLS+gmm","Feasible"),ordered=T), --&gt;
&lt;!-- N=factor(N), --&gt;
&lt;!-- m=factor(m))] --&gt;
&lt;!-- plot.df &lt;- plot.df[(variable=='x') &amp; (m != 1000) &amp; (m!=500) &amp; (N!=5000) &amp; (N!=10000) &amp; !is.na(p.true.in.ci) &amp; (method!="Multiple imputation (Classifier features unobserved)")] --&gt;
&lt;!-- p &lt;- ggplot(plot.df, aes(y=mean.est, ymax=mean.est + var.est/2, ymin=mean.est-var.est/2, x=method)) --&gt;
&lt;!-- p &lt;- p + geom_hline(aes(yintercept=0.2),linetype=2) --&gt;
&lt;!-- p &lt;- p + geom_pointrange() + facet_grid(m~N,as.table=F) + scale_x_discrete(labels=label_wrap_gen(4)) --&gt;
&lt;!-- print(p) --&gt;
&lt;!-- # get gtable object --&gt;
&lt;!-- .large[.left [![](images/example_2_dag.png)]] --&gt;
&lt;!-- There are at two general ways using a predictive model can introduce bias: *attenuation*, and *confounding.* --&gt;
&lt;!-- Counfounding can be broken down into 4 types: --&gt;
&lt;!-- .right[Confounding on `\(X\)` by observed variables --&gt;
&lt;!-- Confounding on `\(Y\)` by observed variables --&gt;
&lt;!-- ] --&gt;
&lt;!-- .left[Confounding on `\(X\)` by *un*observed variables --&gt;
&lt;!-- Confounding on `\(Y\)` by *un*observed variables --&gt;
&lt;!-- ] --&gt;
&lt;!-- Attenuation and the top-right column can be dealt with relative ease using a few different methods. --&gt;
&lt;!-- The bottom-left column can be addressed, but so far I haven't found a magic bullet. --&gt;
&lt;!-- The left column is pretty much a hopeless situation. --&gt;
</textarea>
<style data-target="print-only">@media screen {.remark-slide-container{display:block;}.remark-slide-scaler{box-shadow:none;}}</style>
<script src="libs/remark-latest.min.js"></script>
<script>var slideshow = remark.create({
"highlightStyle": "github",
"ratio": "16:9",
"countIncrementalSlides": true,
"slideNumberFormat": "<div class=\"progress-bar-container\">\n <div class=\"progress-bar\" style=\"width: calc(%current% / %total% * 100%);\">\n </div>\n</div>\n"
});
if (window.HTMLWidgets) slideshow.on('afterShowSlide', function (slide) {
window.dispatchEvent(new Event('resize'));
});
(function(d) {
var s = d.createElement("style"), r = d.querySelector(".remark-slide-scaler");
if (!r) return;
s.type = "text/css"; s.innerHTML = "@page {size: " + r.style.width + " " + r.style.height +"; }";
d.head.appendChild(s);
})(document);
(function(d) {
var el = d.getElementsByClassName("remark-slides-area");
if (!el) return;
var slide, slides = slideshow.getSlides(), els = el[0].children;
for (var i = 1; i < slides.length; i++) {
slide = slides[i];
if (slide.properties.continued === "true" || slide.properties.count === "false") {
els[i - 1].className += ' has-continuation';
}
}
var s = d.createElement("style");
s.type = "text/css"; s.innerHTML = "@media print { .has-continuation { display: none; } }";
d.head.appendChild(s);
})(document);
// delete the temporary CSS (for displaying all slides initially) when the user
// starts to view slides
(function() {
var deleted = false;
slideshow.on('beforeShowSlide', function(slide) {
if (deleted) return;
var sheets = document.styleSheets, node;
for (var i = 0; i < sheets.length; i++) {
node = sheets[i].ownerNode;
if (node.dataset["target"] !== "print-only") continue;
node.parentNode.removeChild(node);
}
deleted = true;
});
})();
// add `data-at-shortcutkeys` attribute to <body> to resolve conflicts with JAWS
// screen reader (see PR #262)
(function(d) {
let res = {};
d.querySelectorAll('.remark-help-content table tr').forEach(tr => {
const t = tr.querySelector('td:nth-child(2)').innerText;
tr.querySelectorAll('td:first-child .key').forEach(key => {
const k = key.innerText;
if (/^[a-z]$/.test(k)) res[k] = t; // must be a single letter (key)
});
});
d.body.setAttribute('data-at-shortcutkeys', JSON.stringify(res));
})(document);
(function() {
"use strict"
// Replace <script> tags in slides area to make them executable
var scripts = document.querySelectorAll(
'.remark-slides-area .remark-slide-container script'
);
if (!scripts.length) return;
for (var i = 0; i < scripts.length; i++) {
var s = document.createElement('script');
var code = document.createTextNode(scripts[i].textContent);
s.appendChild(code);
var scriptAttrs = scripts[i].attributes;
for (var j = 0; j < scriptAttrs.length; j++) {
s.setAttribute(scriptAttrs[j].name, scriptAttrs[j].value);
}
scripts[i].parentElement.replaceChild(s, scripts[i]);
}
})();
(function() {
var links = document.getElementsByTagName('a');
for (var i = 0; i < links.length; i++) {
if (/^(https?:)?\/\//.test(links[i].getAttribute('href'))) {
links[i].target = '_blank';
}
}
})();</script>
<script>
slideshow._releaseMath = function(el) {
var i, text, code, codes = el.getElementsByTagName('code');
for (i = 0; i < codes.length;) {
code = codes[i];
if (code.parentNode.tagName !== 'PRE' && code.childElementCount === 0) {
text = code.textContent;
if (/^\\\((.|\s)+\\\)$/.test(text) || /^\\\[(.|\s)+\\\]$/.test(text) ||
/^\$\$(.|\s)+\$\$$/.test(text) ||
/^\\begin\{([^}]+)\}(.|\s)+\\end\{[^}]+\}$/.test(text)) {
code.outerHTML = code.innerHTML; // remove <code></code>
continue;
}
}
i++;
}
};
slideshow._releaseMath(document);
</script>
<!-- dynamically load mathjax for compatibility with self-contained -->
<script>
(function () {
var script = document.createElement('script');
script.type = 'text/javascript';
script.src = 'https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-MML-AM_CHTML';
if (location.protocol !== 'file:' && /^https?:/.test(script.src))
script.src = script.src.replace(/^https?:/, '');
document.getElementsByTagName('head')[0].appendChild(script);
})();
</script>
</body>
</html>

Binary file not shown.

Before

Width:  |  Height:  |  Size: 152 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 310 KiB

View File

@ -1 +0,0 @@
../../../charts/example_1_dag/example_1_dag.png

View File

@ -1 +0,0 @@
../../../charts/example_1_dag/example_1_dag.svg

View File

@ -1 +0,0 @@
../../../charts/example_2_dag/example_2_dag.png

View File

@ -1 +0,0 @@
../../../charts/example_2_dag/example_2_dag.svg

View File

@ -1 +0,0 @@
../../../charts/example_3_dag/example_3_dag.png

View File

@ -1 +0,0 @@
../../../charts/example_3_dag/example_3_dag.svg

Binary file not shown.

Before

Width:  |  Height:  |  Size: 46 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 200 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 20 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 24 KiB

View File

@ -1,12 +0,0 @@
// Pandoc 2.9 adds attributes on both header and div. We remove the former (to
// be compatible with the behavior of Pandoc < 2.8).
document.addEventListener('DOMContentLoaded', function(e) {
var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
var i, h, a;
for (i = 0; i < hs.length; i++) {
h = hs[i];
if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6
a = h.attributes;
while (a.length > 0) h.removeAttribute(a[0].name);
}
});

View File

@ -1,12 +0,0 @@
// Pandoc 2.9 adds attributes on both header and div. We remove the former (to
// be compatible with the behavior of Pandoc < 2.8).
document.addEventListener('DOMContentLoaded', function(e) {
var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
var i, h, a;
for (i = 0; i < hs.length; i++) {
h = hs[i];
if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6
a = h.attributes;
while (a.length > 0) h.removeAttribute(a[0].name);
}
});

File diff suppressed because one or more lines are too long

View File

@ -1,72 +0,0 @@
a, a > code {
color: rgb(249, 38, 114);
text-decoration: none;
}
.footnote {
position: absolute;
bottom: 3em;
padding-right: 4em;
font-size: 90%;
}
.remark-code-line-highlighted { background-color: #ffff88; }
.inverse {
background-color: #272822;
color: #d6d6d6;
text-shadow: 0 0 20px #333;
}
.inverse h1, .inverse h2, .inverse h3 {
color: #f3f3f3;
}
/* Two-column layout */
.left-column {
color: #777;
width: 20%;
height: 92%;
float: left;
}
.left-column h2:last-of-type, .left-column h3:last-child {
color: #000;
}
.right-column {
width: 75%;
float: right;
padding-top: 1em;
}
.pull-left {
float: left;
width: 47%;
}
.pull-right {
float: right;
width: 47%;
}
.pull-right + * {
clear: both;
}
img, video, iframe {
max-width: 100%;
}
blockquote {
border-left: solid 5px lightgray;
padding-left: 1em;
}
.remark-slide table {
margin: auto;
border-top: 1px solid #666;
border-bottom: 1px solid #666;
}
.remark-slide table thead th { border-bottom: 1px solid #ddd; }
th, td { padding: 5px; }
.remark-slide thead, .remark-slide tfoot, .remark-slide tr:nth-child(even) { background: #eee }
@page { margin: 0; }
@media print {
.remark-slide-scaler {
width: 100% !important;
height: 100% !important;
transform: scale(1) !important;
top: 0 !important;
left: 0 !important;
}
}

File diff suppressed because one or more lines are too long

View File

@ -1 +0,0 @@
/home/nathante/mathjax

View File

@ -1,145 +0,0 @@
.huge { font-size: 170% }
.large { font-size: 140% }
.small { font-size: 70% }
.tiny{font-size: 40%}
/* .inverse { */
/* background-color: #272822; */
/* color: #d6d6d6; */
/* text-shadow: 0 0 20px #333; */
/* } */
.header-image{
width:650px;
display:inline-block;
}
.large img{
width:250px;
}
.emph{
color:#4e2a84;
font-weight: bolder;
}
.mygreen{
color:#2eab20;
}
.myyellow{
color:#AB9d20;
}
.myblue{
color:#2073AB;
}
.myred{
color:#AB202E;
}
.cite{
font-weight: lighter;
font-size:60%;
font-family:"times", "Helvetica","serif";
position: fixed;
bottom: 16px;
}
.left-column {
color: #777;
width: 40%;
height: 100%;
float: left;
}
.left-column h2:last-of-type, .left-column h3:last-child {
color: #000;
}
.right-column {
width: 60%;
float: right;
padding-top: 1em;
}
.hypo-mark img{
width:120px;
position: fixed;
bottom: 545px;
left: 1050px;
}
.hypo-mark-1 img{
}
.hypo-mark-2 img{
bottom:480px;
}
.hypo-mark-3 img{
bottom:480px;
left:1050px;
}
.remark-slide-number {
position: inherit;
}
.remark-slide-number .progress-bar-container {
position: absolute;
bottom: 0;
height: 4px;
display: block;
left: 0;
right: 0;
}
a, a > code{
color:#4e2a84;
text-decoration:none;
}
.remark-slide-number .progress-bar {
height: 100%;
background-color: #4e2a84;
}
.border{
border-bottom: #4e2a84 solid 0.7mm;
padding: 3px;
display:inline-block;
}
div.my-header {
background-color: #4e2a84;
background: -webkit-linear-gradient(left, #604982, #4E2A84 30%, #5820AB 70%, #5820AB);
position: fixed;
top: 0px;
left: 0px;
height: 26px;
width: 100%;
text-align: left;
}
.inverse {
background-color: #322e37;
color: #FCFBFD;
text-shadow: 0 0 20px #333;
}
.inverse h1, .inverse h2, .inverse h3, .inverse h4{
color: #FCFBFD;
text-shadow: 0 0 20px #333;
}
.remark-slide thead, .remark-slide tfoot, .remark-slide tr:nth-child(2n) {
background: #d7c9ec;
}
.narrow{
padding-left: 150px;
padding-right: 150px;
}

View File

@ -1,793 +0,0 @@
<!DOCTYPE html>
<html$if(lang)$ lang="$lang$"$endif$$if(dir)$ dir="$dir$"$endif$>
<head>
<meta charset="utf-8">
<meta name="generator" content="pandoc">
$for(author-meta)$
<meta name="author" content="$author-meta$" />
$endfor$
$if(date-meta)$
<meta name="dcterms.date" content="$date-meta$" />
$endif$
$if(keywords)$
<meta name="keywords" content="$for(keywords)$$keywords$$sep$, $endfor$">
$endif$
<title>$if(title-prefix)$$title-prefix$ $endif$$pagetitle$</title>
<meta name="apple-mobile-web-app-capable" content="yes">
<meta name="apple-mobile-web-app-status-bar-style" content="black-translucent">
<meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no, minimal-ui">
<link rel="stylesheet" href="$revealjs-url$/css/reveal.css"/>
$if(highlightjs)$
<link rel="stylesheet"
href="$highlightjs$/$if(highlightjs-theme)$$highlightjs-theme$$else$default$endif$.css"
$if(html5)$$else$type="text/css" $endif$/>
<script src="$highlightjs$/highlight.js"></script>
$endif$
$if(highlighting-css)$
<style type="text/css">
$highlighting-css$
</style>
$endif$
$if(theme)$
<link rel="stylesheet" href="$revealjs-url$/css/theme/$theme$.css" id="theme">
$endif$
$if(theme-dark)$
<style type="text/css">
.reveal section img {
background: rgba(255, 255, 255, 0.85);
}
</style>
$endif$
<!-- some tweaks to reveal css -->
<style type="text/css">
.reveal h1 { font-size: 2.0em; }
.reveal h2 { font-size: 1.5em; }
.reveal h3 { font-size: 1.25em; }
.reveal h4 { font-size: 1em; }
.reveal .slides>section,
.reveal .slides>section>section {
padding: 0px 0px;
}
$if(center)$
$else$
.reveal .title {
margin-top: 125px;
margin-bottom: 50px;
}
$endif$
.reveal table {
border-width: 1px;
border-spacing: 2px;
border-style: dotted;
border-color: gray;
border-collapse: collapse;
font-size: 0.7em;
}
.reveal table th {
border-width: 1px;
padding-left: 10px;
padding-right: 25px;
font-weight: bold;
border-style: dotted;
border-color: gray;
}
.reveal table td {
border-width: 1px;
padding-left: 10px;
padding-right: 25px;
border-style: dotted;
border-color: gray;
}
$if(plugin-menu)$
$if(plugin-chalkboard)$
.reveal .slide-menu-button {
left: 105px !important;
}
$endif$
$endif$
</style>
<style type="text/css">code{white-space: pre;}</style>
$if(css)$
$for(css)$
<link rel="stylesheet" href="$css$"/>
$endfor$
$endif$
<!-- Printing and PDF exports -->
<script id="paper-css" type="application/dynamic-css">
/* Default Print Stylesheet Template
by Rob Glazebrook of CSSnewbie.com
Last Updated: June 4, 2008
Feel free (nay, compelled) to edit, append, and
manipulate this file as you see fit. */
@media print {
/* SECTION 1: Set default width, margin, float, and
background. This prevents elements from extending
beyond the edge of the printed page, and prevents
unnecessary background images from printing */
html {
background: #fff;
width: auto;
height: auto;
overflow: visible;
}
body {
background: #fff;
font-size: 20pt;
width: auto;
height: auto;
border: 0;
margin: 0 5%;
padding: 0;
overflow: visible;
float: none !important;
}
/* SECTION 2: Remove any elements not needed in print.
This would include navigation, ads, sidebars, etc. */
.nestedarrow,
.controls,
.fork-reveal,
.share-reveal,
.state-background,
.reveal .progress,
.reveal .backgrounds {
display: none !important;
}
/* SECTION 3: Set body font face, size, and color.
Consider using a serif font for readability. */
body, p, td, li, div {
font-size: 20pt!important;
font-family: Georgia, "Times New Roman", Times, serif !important;
color: #000;
}
/* SECTION 4: Set heading font face, sizes, and color.
Differentiate your headings from your body text.
Perhaps use a large sans-serif for distinction. */
h1,h2,h3,h4,h5,h6 {
color: #000!important;
height: auto;
line-height: normal;
font-family: Georgia, "Times New Roman", Times, serif !important;
text-shadow: 0 0 0 #000 !important;
text-align: left;
letter-spacing: normal;
}
/* Need to reduce the size of the fonts for printing */
h1 { font-size: 28pt !important; }
h2 { font-size: 24pt !important; }
h3 { font-size: 22pt !important; }
h4 { font-size: 22pt !important; font-variant: small-caps; }
h5 { font-size: 21pt !important; }
h6 { font-size: 20pt !important; font-style: italic; }
/* SECTION 5: Make hyperlinks more usable.
Ensure links are underlined, and consider appending
the URL to the end of the link for usability. */
a:link,
a:visited {
color: #000 !important;
font-weight: bold;
text-decoration: underline;
}
/*
.reveal a:link:after,
.reveal a:visited:after {
content: " (" attr(href) ") ";
color: #222 !important;
font-size: 90%;
}
*/
/* SECTION 6: more reveal.js specific additions by @skypanther */
ul, ol, div, p {
visibility: visible;
position: static;
width: auto;
height: auto;
display: block;
overflow: visible;
margin: 0;
text-align: left !important;
}
.reveal pre,
.reveal table {
margin-left: 0;
margin-right: 0;
}
.reveal pre code {
padding: 20px;
border: 1px solid #ddd;
}
.reveal blockquote {
margin: 20px 0;
}
.reveal .slides {
position: static !important;
width: auto !important;
height: auto !important;
left: 0 !important;
top: 0 !important;
margin-left: 0 !important;
margin-top: 0 !important;
padding: 0 !important;
zoom: 1 !important;
overflow: visible !important;
display: block !important;
text-align: left !important;
-webkit-perspective: none;
-moz-perspective: none;
-ms-perspective: none;
perspective: none;
-webkit-perspective-origin: 50% 50%;
-moz-perspective-origin: 50% 50%;
-ms-perspective-origin: 50% 50%;
perspective-origin: 50% 50%;
}
.reveal .slides section {
visibility: visible !important;
position: static !important;
width: auto !important;
height: auto !important;
display: block !important;
overflow: visible !important;
left: 0 !important;
top: 0 !important;
margin-left: 0 !important;
margin-top: 0 !important;
padding: 60px 20px !important;
z-index: auto !important;
opacity: 1 !important;
page-break-after: always !important;
-webkit-transform-style: flat !important;
-moz-transform-style: flat !important;
-ms-transform-style: flat !important;
transform-style: flat !important;
-webkit-transform: none !important;
-moz-transform: none !important;
-ms-transform: none !important;
transform: none !important;
-webkit-transition: none !important;
-moz-transition: none !important;
-ms-transition: none !important;
transition: none !important;
}
.reveal .slides section.stack {
padding: 0 !important;
}
.reveal section:last-of-type {
page-break-after: avoid !important;
}
.reveal section .fragment {
opacity: 1 !important;
visibility: visible !important;
-webkit-transform: none !important;
-moz-transform: none !important;
-ms-transform: none !important;
transform: none !important;
}
.reveal section img {
display: block;
margin: 15px 0px;
background: rgba(255,255,255,1);
border: 1px solid #666;
box-shadow: none;
}
.reveal section small {
font-size: 0.8em;
}
}
</script>
<script id="pdf-css" type="application/dynamic-css">
/**
* This stylesheet is used to print reveal.js
* presentations to PDF.
*
* https://github.com/hakimel/reveal.js#pdf-export
*/
* {
-webkit-print-color-adjust: exact;
}
body {
margin: 0 auto !important;
border: 0;
padding: 0;
float: none !important;
overflow: visible;
}
html {
width: 100%;
height: 100%;
overflow: visible;
}
/* Remove any elements not needed in print. */
.nestedarrow,
.reveal .controls,
.reveal .progress,
.reveal .playback,
.reveal.overview,
.fork-reveal,
.share-reveal,
.state-background {
display: none !important;
}
h1, h2, h3, h4, h5, h6 {
text-shadow: 0 0 0 #000 !important;
}
.reveal pre code {
overflow: hidden !important;
font-family: Courier, 'Courier New', monospace !important;
}
ul, ol, div, p {
visibility: visible;
position: static;
width: auto;
height: auto;
display: block;
overflow: visible;
margin: auto;
}
.reveal {
width: auto !important;
height: auto !important;
overflow: hidden !important;
}
.reveal .slides {
position: static;
width: 100%;
height: auto;
left: auto;
top: auto;
margin: 0 !important;
padding: 0 !important;
overflow: visible;
display: block;
-webkit-perspective: none;
-moz-perspective: none;
-ms-perspective: none;
perspective: none;
-webkit-perspective-origin: 50% 50%; /* there isn't a none/auto value but 50-50 is the default */
-moz-perspective-origin: 50% 50%;
-ms-perspective-origin: 50% 50%;
perspective-origin: 50% 50%;
}
.reveal .slides section {
page-break-after: always !important;
visibility: visible !important;
position: relative !important;
display: block !important;
position: relative !important;
margin: 0 !important;
padding: 0 !important;
box-sizing: border-box !important;
min-height: 1px;
opacity: 1 !important;
-webkit-transform-style: flat !important;
-moz-transform-style: flat !important;
-ms-transform-style: flat !important;
transform-style: flat !important;
-webkit-transform: none !important;
-moz-transform: none !important;
-ms-transform: none !important;
transform: none !important;
}
.reveal section.stack {
margin: 0 !important;
padding: 0 !important;
page-break-after: avoid !important;
height: auto !important;
min-height: auto !important;
}
.reveal img {
box-shadow: none;
}
.reveal .roll {
overflow: visible;
line-height: 1em;
}
/* Slide backgrounds are placed inside of their slide when exporting to PDF */
.reveal section .slide-background {
display: block !important;
position: absolute;
top: 0;
left: 0;
width: 100%;
z-index: -1;
}
/* All elements should be above the slide-background */
.reveal section>* {
position: relative;
z-index: 1;
}
/* Display slide speaker notes when 'showNotes' is enabled */
.reveal .speaker-notes-pdf {
display: block;
width: 100%;
max-height: none;
left: auto;
top: auto;
z-index: 100;
}
/* Display slide numbers when 'slideNumber' is enabled */
.reveal .slide-number-pdf {
display: block;
position: absolute;
font-size: 14px;
}
</script>
<script>
var style = document.createElement( 'style' );
style.type = 'text/css';
var style_script_id = window.location.search.match( /print-pdf/gi ) ? 'pdf-css' : 'paper-css';
var style_script = document.getElementById(style_script_id).text;
style.innerHTML = style_script;
document.getElementsByTagName('head')[0].appendChild(style);
</script>
$for(header-includes)$
$header-includes$
$endfor$
</head>
<body>
$for(include-before)$
$include-before$
$endfor$
<div class="reveal">
<div class="slides">
<!--
$if(title)$
<section>
<h1 class="title">$title$</h1>
$if(subtitle)$
<h1 class="subtitle">$subtitle$</h1>
$endif$
$for(author)$
<h2 class="author">$author$</h2>
$endfor$
$if(date)$
<h3 class="date">$date$</h3>
$endif$
</section>
$endif$
$if(toc)$
<section id="$idprefix$TOC">
$toc$
</section>
$endif$
-->
$body$
</div>
</div>
<script src="$revealjs-url$/lib/js/head.min.js"></script>
<script src="$revealjs-url$/js/reveal.js"></script>
<script>
// Full list of configuration options available at:
// https://github.com/hakimel/reveal.js#configuration
Reveal.initialize({
$if(controls)$
// Display controls in the bottom right corner
controls: $controls$,
$endif$
$if(progress)$
// Display a presentation progress bar
progress: $progress$,
$endif$
$if(slideNumber)$
// Display the page number of the current slide
slideNumber: $slideNumber$,
$endif$
$if(history)$
// Push each slide change to the browser history
history: $history$,
$endif$
$if(keyboard)$
// Enable keyboard shortcuts for navigation
keyboard: $keyboard$,
$endif$
$if(overview)$
// Enable the slide overview mode
overview: $overview$,
$endif$
$if(center)$
// Vertical centering of slides
center: $center$,
$endif$
$if(touch)$
// Enables touch navigation on devices with touch input
touch: $touch$,
$endif$
$if(loop)$
// Loop the presentation
loop: $loop$,
$endif$
$if(rtl)$
// Change the presentation direction to be RTL
rtl: $rtl$,
$endif$
$if(fragments)$
// Turns fragments on and off globally
fragments: $fragments$,
$endif$
$if(embedded)$
// Flags if the presentation is running in an embedded mode,
// i.e. contained within a limited portion of the screen
embedded: $embedded$,
$endif$
$if(help)$
// Flags if we should show a help overlay when the questionmark
// key is pressed
help: $help$,
$endif$
$if(autoSlide)$
// Number of milliseconds between automatically proceeding to the
// next slide, disabled when set to 0, this value can be overwritten
// by using a data-autoslide attribute on your slides
autoSlide: $autoSlide$,
$endif$
$if(autoSlideStoppable)$
// Stop auto-sliding after user input
autoSlideStoppable: $autoSlideStoppable$,
$endif$
$if(mouseWheel)$
// Enable slide navigation via mouse wheel
mouseWheel: $mouseWheel$,
$endif$
$if(hideAddressBar)$
// Hides the address bar on mobile devices
hideAddressBar: $hideAddressBar$,
$endif$
$if(previewLinks)$
// Opens links in an iframe preview overlay
previewLinks: $previewLinks$,
$endif$
$if(transition)$
// Transition style
transition: '$transition$', // none/fade/slide/convex/concave/zoom
$endif$
$if(transitionSpeed)$
// Transition speed
transitionSpeed: '$transitionSpeed$', // default/fast/slow
$endif$
$if(backgroundTransition)$
// Transition style for full page slide backgrounds
backgroundTransition: '$backgroundTransition$', // none/fade/slide/convex/concave/zoom
$endif$
$if(viewDistance)$
// Number of slides away from the current that are visible
viewDistance: $viewDistance$,
$endif$
$if(parallaxBackgroundImage)$
// Parallax background image
parallaxBackgroundImage: '$parallaxBackgroundImage$', // e.g. "'https://s3.amazonaws.com/hakim-static/reveal-js/reveal-parallax-1.jpg'"
$endif$
$if(parallaxBackgroundSize)$
// Parallax background size
parallaxBackgroundSize: '$parallaxBackgroundSize$', // CSS syntax, e.g. "2100px 900px"
$endif$
$if(parallaxBackgroundHorizontal)$
// Amount to move parallax background (horizontal and vertical) on slide change
// Number, e.g. 100
parallaxBackgroundHorizontal: '$parallaxBackgroundHorizontal$',
$endif$
$if(parallaxBackgroundVertical)$
parallaxBackgroundVertical: '$parallaxBackgroundVertical$',
$endif$
$if(width)$
// The "normal" size of the presentation, aspect ratio will be preserved
// when the presentation is scaled to fit different resolutions. Can be
// specified using percentage units.
width: $width$,
$endif$
$if(height)$
height: $height$,
$endif$
$if(margin)$
// Factor of the display size that should remain empty around the content
margin: $margin$,
$endif$
$if(minScale)$
// Bounds for smallest/largest possible scale to apply to content
minScale: $minScale$,
$endif$
$if(maxScale)$
maxScale: $maxScale$,
$endif$
$if(plugin-menu)$
menu: {
$if(menu-side)$
side: $menu-side$,
$endif$
$if(menu-numbers)$
numbers: $menu-numbers$,
$endif$
$if(menu-titleSelector)$
titleSelector: $menu-titleSelector$,
$endif$
$if(menu-hideMissingTitles)$
hideMissingTitles: $menu-hideMissingTitles$,
$endif$
$if(menu-markers)$
markers: $menu-markers$,
$endif$
$if(menu-openButton)$
openButton: $menu-openButton$,
$endif$
$if(menu-openSlideNumber)$
openSlideNumber: $menu-openSlideNumber$,
$endif$
$if(menu-keyboard)$
keyboard: $menu-keyboard$,
$endif$
custom: false,
themes: false,
transitions: false
},
$endif$
$if(plugin-chalkboard)$
chalkboard: {
$if(chalkboard-src)$
src: $chalkboard-src$,
$endif$
$if(chalkboard-readOnly)$
readOnly: $chalkboard-readOnly$,
$endif$
$if(chalkboard-toggleNotesButton)$
toggleNotesButton: $chalkboard-toggleNotesButton$,
$endif$
$if(chalkboard-toggleChalkboardButton)$
toggleChalkboardButton: $chalkboard-toggleChalkboardButton$,
$endif$
$if(chalkboard-transition)$
transition: $chalkboard-transition$,
$endif$
$if(chalkboard-theme)$
theme: $chalkboard-theme$,
$endif$
$if(chalkboard-color)$
color: $chalkboard-color$,
$endif$
$if(chalkboard-background)$
background: $chalkboard-background$,
$endif$
$if(chalkboard-pen)$
pen: $chalkboard-pen$,
$endif$
},
keyboard: {
67: function() { RevealChalkboard.toggleNotesCanvas() }, // toggle notes canvas when 'c' is pressed
66: function() { RevealChalkboard.toggleChalkboard() }, // toggle chalkboard when 'b' is pressed
46: function() { RevealChalkboard.clear() }, // clear chalkboard when 'DEL' is pressed
8: function() { RevealChalkboard.reset() }, // reset chalkboard data on current slide when 'BACKSPACE' is pressed
68: function() { RevealChalkboard.download() }, // downlad recorded chalkboard drawing when 'd' is pressed
},
$endif$
// Optional reveal.js plugins
dependencies: [
$if(plugin-notes)$
{ src: '$revealjs-url$/plugin/notes/notes.js', async: true },
$endif$
$if(plugin-search)$
{ src: '$revealjs-url$/plugin/search/search.js', async: true },
$endif$
$if(plugin-zoom)$
{ src: '$revealjs-url$/plugin/zoom-js/zoom.js', async: true },
$endif$
$if(plugin-chalkboard)$
{ src: '$revealjs-url$/plugin/chalkboard/chalkboard.js', async: true },
$endif$
$if(plugin-menu)$
{ src: '$revealjs-url$/plugin/menu/menu.js', async: true },
$endif$
]
});
</script>
$if(mathjax-url)$
<!-- dynamically load mathjax for compatibility with self-contained -->
<script>
(function () {
var script = document.createElement("script");
script.type = "text/javascript";
script.src = "$mathjax-url$";
document.getElementsByTagName("head")[0].appendChild(script);
})();
</script>
$endif$
<script>
(function() {
if (window.jQuery) {
Reveal.addEventListener( 'slidechanged', function(event) {
window.jQuery(event.previousSlide).trigger('hidden');
window.jQuery(event.currentSlide).trigger('shown');
});
}
})();
</script>
$for(include-after)$
$include-after$
$endfor$
</body>
</html>