115 lines
4.9 KiB
R
115 lines
4.9 KiB
R
library(scales)
|
|
library(data.table)
|
|
library(ggplot2)
|
|
iv.example <- readRDS("iv_perspective_example.RDS")
|
|
|
|
dv.example <- readRDS("dv_perspective_example.RDS")
|
|
|
|
iv.sample.prop <- iv.example$cc_ex_tox.likes.race_disclosed.medsampsample.prop
|
|
dv.sample.prop <- dv.example$cc_ex_tox.likes.race_disclosed.largesampsample.prop
|
|
|
|
iv.sample.count <- iv.example$cc_ex_tox.likes.race_disclosed.medsampsample.count
|
|
dv.sample.count <- dv.example$cc_ex_tox.likes.race_disclosed.largesampsample.count
|
|
|
|
|
|
plot.cc.example <- function(datalist, name, varnames=NULL, varorder=NULL, include.models=c("Automatic Classification", "All Annotations")){
|
|
|
|
model.names <- c("Automatic Classification", "All Annotations", "Annotation Sample", "Error Correction")
|
|
|
|
glm.par.names <- paste0(name,"coef_",c("pred", "coder", "sample"), "_model")
|
|
|
|
measerr.par.name <- paste0(name,"measerr_model_par")
|
|
glm.pars <- datalist[glm.par.names]
|
|
|
|
n.pars <- length(glm.pars[[1]])
|
|
|
|
all.pars <- append(glm.pars, list("corrected"=datalist[[measerr.par.name]][1:n.pars]))
|
|
names(all.pars) <- model.names
|
|
df.pars <- as.data.table(data.frame(all.pars),keep.rownames=TRUE)
|
|
|
|
if(!is.null(varnames)){
|
|
df.pars[, rn := varnames]
|
|
}
|
|
|
|
setnames(df.pars, old="rn", new="variable")
|
|
|
|
glm.stderr.names <- paste0(name,"se_",c("pred", "coder", "sample"), "_model")
|
|
glm.stderr <- datalist[glm.stderr.names]
|
|
measerr.stderr.name <- paste0(name,"measerr_model_stderr")
|
|
all.stderr <- append(glm.stderr, list("corrected"=datalist[[measerr.stderr.name]][1:n.pars]))
|
|
names(all.stderr) <- model.names
|
|
df.stderr <- as.data.table(data.frame(all.stderr), keep.rownames=TRUE)
|
|
|
|
if(!is.null(varnames)){
|
|
df.stderr[, rn := varnames]
|
|
}
|
|
|
|
setnames(df.stderr, old="rn", new="variable")
|
|
|
|
df.pars <- melt(df.pars, id.vars = "variable", variable.name = "Model", value.name = "Estimate")
|
|
|
|
df.stderr <- melt(df.stderr, id.vars = "variable",variable.name = "Model", value.name = "StdErr")
|
|
|
|
df <- df.pars[df.stderr, on = c("variable", "Model")]
|
|
|
|
df[,":="(UpperCI = Estimate + 1.96*sqrt(StdErr),
|
|
LowerCI = Estimate - 1.96*sqrt(StdErr))]
|
|
|
|
if(!is.null(varorder)){
|
|
df[,variable:=factor(variable,levels=varorder)]
|
|
}
|
|
|
|
df[,Model:= factor(gsub('\\.',' ', Model), levels=rev(model.names))]
|
|
|
|
df <- df[Model %in% include.models]
|
|
rename_models <- list("Automatic Classification"="Automated Classifications", "All Annotations"="Manual Annotations")
|
|
df <- df[, Model := factor(rename_models[as.character(df$Model)],levels=rev(unique(rename_models[model.names])))]
|
|
|
|
p <- ggplot(df[variable != "Intercept"], aes(y = Estimate, x=Model, ymax=LowerCI, ymin=UpperCI, group=variable))
|
|
p <- p + geom_pointrange(shape=1) + facet_wrap('variable',scales='free_x',nrow=1,as.table=F) + geom_hline(aes(yintercept=0),linetype='dashed',color='gray40') + coord_flip() + xlab("")
|
|
p <- p + scale_y_continuous(breaks=breaks_extended(4))
|
|
return(p)
|
|
}
|
|
|
|
|
|
plot.civilcomments.dv.example <- function(include.models=c("Automated Classifications", "Manual Annotations")){
|
|
p <- plot.cc.example(dv.example, "cc_ex_tox.likes.race_disclosed.medsamp", varnames=c("Intercept", "Likes", "Identity Disclosure", "Likes:Identity Disclosure"),varorder=c("Intercept", "Likes", "Identity Disclosure", "Likes:Identity Disclosure"), include.models=include.models) + ylab("Coefficients and 95% Confidence Intervals") + ggtitle("Logistic Regression Predicting Toxicity")
|
|
return(p)
|
|
}
|
|
|
|
|
|
plot.civilcomments.iv.example <- function(include.models=c("Automatic Classification", "All Annotations")){
|
|
plot.cc.example(iv.example, "cc_ex_tox.likes.race_disclosed.medsamp", varnames=c("Intercept", "Likes", "Likes:Toxicity", "Toxicity"),varorder=c("Intercept", "Likes", "Toxicity", "Likes:Toxicity"), include.models=include.models) + ylab("Coefficients and 95% Confidence Intervals") + ggtitle("Logistic Regression Predicting Racial/Ethnic Identity Disclosure")
|
|
}
|
|
|
|
|
|
plot.civilcomments.iv.example.2 <- function(){
|
|
attach(iv.example)
|
|
df.pars <- rbind(cc_ex_tox.likes.race_disclosedcoef_pred_model,
|
|
cc_ex_tox.likes.race_disclosedcoef_coder_model,
|
|
cc_ex_tox.likes.race_disclosedcoef_sample_model,
|
|
cc_ex_tox.likes.race_disclosedmeaserr_model_par[1:3]
|
|
)
|
|
|
|
rownames(df.pars) <- c('predictions', 'coders', 'sample', 'corrected')
|
|
|
|
df.stderr <- rbind(cc_ex_tox.likes.race_disclosedse_pred_model,
|
|
cc_ex_tox.likes.race_disclosedse_coder_model,
|
|
cc_ex_tox.likes.race_disclosedse_sample_model,
|
|
cc_ex_tox.likes.race_disclosedmeaserr_model_stderr[1:3]
|
|
)
|
|
|
|
rownames(df.pars) <- c('predictions', 'coders', 'sample', 'corrected')
|
|
|
|
ci.upper <- df.pars + 1.96 * sqrt(df.stderr)
|
|
ci.lower <- df.pars - 1.96 * sqrt(df.stderr)
|
|
return(plot.cc.example(df.pars, ci.lower, ci.upper))
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|