# a) the basic things, in a table: # Condition Sample Size mean standard deviation standard error # Immediately after 2 48.705 1.534422 1.085 # One day after 2 41.955 2.128391 1.505 # Three days after 2 21.795 0.7707464 0.545 # Five days after 2 12.415 1.081873 0.765 # Seven days after 2 8.32 0.2687006 0.19 # b) do a one way anova based on the data, like the last homework grp <- c(1,1,2,2,3,3,4,4,5,5) results <- aov(resp~factor(grp)) anova(results) # c) summarize the data and the means w a plot, boxplot means <- c(48.705, 41.955, 21.795, 12.415, 8.32) # c) summarize the data and the means w a plot, boxplot boxplot(results) # c) summarize the data and the means w a plot, boxplot boxplot(resp) # c) summarize the data and the means w a plot, boxplot boxplot(resp) # c) summarize the data and the means w a plot, boxplot boxplot(resp~grp) ALevels <- c(3.36, 3.34, 3.28, 3.20, 3.26, 3.16, 3.25, 3.36, 3.01, 2.92) ELevels <- c(94.6, 96.0, 95.7, 93.2, 97.4, 94.3, 95.0, 97.7, 92.3, 95.1) Aresults <- aov(Alevels~factor(grp)) ALevels <- c(3.36, 3.34, 3.28, 3.20, 3.26, 3.16, 3.25, 3.36, 3.01, 2.92) ELevels <- c(94.6, 96.0, 95.7, 93.2, 97.4, 94.3, 95.0, 97.7, 92.3, 95.1) Aresults <- aov(Alevels~factor(grp)) ALevels <- c(3.36, 3.34, 3.28, 3.20, 3.26, 3.16, 3.25, 3.36, 3.01, 2.92) ELevels <- c(94.6, 96.0, 95.7, 93.2, 97.4, 94.3, 95.0, 97.7, 92.3, 95.1) Aresults <- aov(ALevels~factor(grp)) Eresults <- aov(ELevels~factor(grp)) # Vitamin A Anova: anova(Aresults) # Vimain E Anova: anova(Eresults) # 12.10 # four groups, how do nemaotodes impact plant growth # a) zero_nema <- c(10.8, 9.1, 13.5, 9.2) thousand_name <-c(11.1, 11.1, 8.2, 11.3) thousand_nema <-c(11.1, 11.1, 8.2, 11.3) fthousand_nema <- c(5.4, 4.6, 7.4, 5.0) tthousand_nema <- c(5.8, 5.3, 3.2, 7.5) mean(zero_nema) sd(zero_nema) mean(thousand_nema) sd(thousand_name) mean(fthousand_nema) sd(fthousand_nema) mean(tthousand_nema) sd(tthousand_nema) # Table # Nematodes Means StdDev # 0 10.65 2.053452 # 1,000 10.425 1.486327 # 5,000 5.6 1.243651 # 10,000 5.45 1.771064 nema_means <- c(10.65, 10.425, 5.6, 5.45) barplot(nema_means) # c) groupings <- c(1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4) resp <- c(zero_nema, thousand_nema, fthousand_nema, tthousand_nema) results <- aov(resp~factor(groupings)) anova(results) # 12.5 # do piano lessons improve spacial temporal piano <- c( 2, 5, 7, -2, 2, 7, 4, 1, 0, 7, 3, 4, 3, 4, 9, 4, 5, 2, 9, 6, 0, 3, 6, -1, 3, 4, 6, 7, -2, 7, -3, 3, 4, 4) singing <- c(1, -1, 0, 1, -4, 0, 0, 1, 0, -1) computer <- c(0, 1, 1, -3, -2, 4, -1, 2, 4, 2,2, 2, -3, -3, 0, 2, 0, -1, 3, -1 ) none <- c(5, -1, 7, 0, 4, 0, 2, 1, -6, 0, 2, -1, 0, -2) size(piano) length(piano) mean(piano) sd(piano) sd(piano)/sqrt(lenth(piano)) sd(piano)/sqrt(length(piano)) length(singing) mean(singing) sd(singing) sd(signing)/sqrt(length(singing)) sd(singing)/sqrt(length(singing)) length(computer) mean(computer) sd(computer) sd(computer)/sqrt(length(computer)) length(none) mean(none) sd(none) sd(none)/sqrt(14) # a) make a table given the sample size # Table: # Lessons Size Mean Standard Dev Standard Error # Piano 34 3.617647 3.055196 0.5239618 # Singing 10 -0.3 1.494434 0.4725816 # Computer 20 0.45 2.21181 0.4945758 # None 14 0.7857143 3.190818 0.8527819 # b) # H0: The spatial-temporal reasoning test results across different lesson groups will be statistically equivalent. # Ha: For at least one lesson group, the results of the reasoning test will be statistically different. data_panel <- data.frame( Y=c(piano, singing, computer, none), Site = factor(rep(c("piano", "singing", "computer", "none"), times=c(length(piano), length(computer), length(singing), length(none)))) ) data_panel tempt <- aov(Y~Site, data=data_panel) anova(tempt) # 12.6 TukeyHSD(tempt) # Summary: Looking at the TukeyHSD results, there are some interesting notes in # where statistically significant variance lies. If we immediately discard the # comparisons with large p-values, we are left with three statistically significant # ones. One is that students with piano lessons do better than computer lesson learners # by an average of 3.5 points, another is that piano outperforms no lessons by about 2.8 points # and lastly that singing underperforms piano by about 3.3 points. While this # statistical tooling is useful for proving the significance of these differences in # performance, we can also evaluate means <- c(mean(piano), mean(singing), mean(computer), mean(none)) barplot(means) # (1) - Get the pilot data and clean it #source('~/Research/tor_wikipedia_edits/handcoded_edits/inter_coder_reliability_ns0.R') #source ('/data/users/mgaughan/kkex_data_110823_3') data1 <- read_csv('../power_data_111023_mmt.csv',show_col_types = FALSE) library(readr) library(ggplot2) # (1) - Get the pilot data and clean it #source('~/Research/tor_wikipedia_edits/handcoded_edits/inter_coder_reliability_ns0.R') #source ('/data/users/mgaughan/kkex_data_110823_3') data1 <- read_csv('../power_data_111023_mmt.csv',show_col_types = FALSE) data2 <- read_csv('../inst_all_packages_full_results.csv') # (1) - Get the pilot data and clean it #source('~/Research/tor_wikipedia_edits/handcoded_edits/inter_coder_reliability_ns0.R') #source ('/data/users/mgaughan/kkex_data_110823_3') data1 <- read_csv('../power_data_111023_mmt.csv',show_col_types = FALSE) library(readr) library(ggplot2) # (1) - Get the pilot data and clean it #source('~/Research/tor_wikipedia_edits/handcoded_edits/inter_coder_reliability_ns0.R') #source ('/data/users/mgaughan/kkex_data_110823_3') data1 <- read_csv('../power_data_111023_mmt.csv',show_col_types = FALSE) data1 <- read_csv('../expanded_data_final.csv',show_col_types = FALSE) # Use pilot project data to calculate power of a full study through simulation # # Parts: # (0) - Setup # (1) - Get the pilot data and clean it # (2) - Run the model on the pilot data and extract effects # (3) - Set up and run the simulation # ====> Set variables at the arrows <==== # ############################################################################## rm(list=ls()) set.seed(424242) library(readr) library(ggplot2) data1 <- read_csv('../expanded_data_final.csv',show_col_types = FALSE) set.seed(424242) library(readr) library(ggplot2) data1 <- read_csv('../expanded_data_final.csv',show_col_types = FALSE) #shows the cross-age downward slopes for all underproduction averages in the face of MMT g3 <- ggplot(data1, aes(x=mmt, y=underproduction_mean)) + geom_smooth(mapping = aes(x=mmt, y=underproduction_mean, color=new.age.factor), method='lm', formula= y~x) + xlab("MMT") + ylab("Underproduction Factor") + theme_bw() g3 library(readr) library(ggplot2) data1 <- read_csv('../expanded_data_final.csv',show_col_types = FALSE) mean(data1$milestone_count) data1$mmt <- (((data1$collaborators * 2)+ data1$contributors) / (data1$contributors + data1$collaborators)) - 1 mean(data1$mmt) rm(list=ls()) set.seed(424242) library(readr) library(ggplot2) data1 <- read_csv('../expanded_data_final.csv',show_col_types = FALSE) library(readr) library(ggplot2) data1 <- read_csv('../power_data_111023_mmt.csv',show_col_types = FALSE) data2 <- read_csv('../inst_all_packages_full_results.csv') data1 <- read_csv('../kk_final_expanded_data_final.csv',show_col_types = FALSE) library(readr) library(ggplot2) library(tidyverse) data1 <- read_csv('../kk_final_expanded_data_final.csv',show_col_types = FALSE) # this is the file with the lmer multi-level rddAnalysis library(tidyverse) library(plyr) # 0 loading the readme data in try(setwd(dirname(rstudioapi::getActiveDocumentContext()$path))) readme_df <- read_csv("../final_data/deb_readme_did.csv") # 1 preprocessing #colnames(readme_df) <- c("upstream_vcs_link", "event_date", "event_hash", "before_all_ct", "before_mrg_ct", "after_all_ct", "after_mrg_ct", "before_auth_new", "after_commit_new", "after_auth_new", "before_commit_new") col_order <- c("upstream_vcs_link", "age_of_project", "event_date", "event_hash", "before_all_ct", "after_all_ct", "before_mrg_ct", "after_mrg_ct", "before_auth_new", "after_auth_new", "before_commit_new", "after_commit_new") readme_df <- readme_df[,col_order] readme_df$ct_before_all <- str_split(gsub("[][]","", readme_df$before_all_ct), ", ") readme_df$ct_after_all <- str_split(gsub("[][]","", readme_df$after_all_ct), ", ") readme_df$ct_before_mrg <- str_split(gsub("[][]","", readme_df$before_mrg_ct), ", ") readme_df$ct_after_mrg <- str_split(gsub("[][]","", readme_df$after_mrg_ct), ", ") drop <- c("before_all_ct", "before_mrg_ct", "after_all_ct", "after_mrg_ct") readme_df = readme_df[,!(names(readme_df) %in% drop)] # 2 some expansion needs to happens for each project expand_timeseries <- function(project_row) { longer <- project_row |> pivot_longer(cols = starts_with("ct"), names_to = "window", values_to = "count") |> unnest(count) longer$observation_type <- gsub("^.*_", "", longer$window) longer <- ddply(longer, "observation_type", transform, week=seq(from=0, by=1, length.out=length(observation_type))) longer$count <- as.numeric(longer$count) #longer <- longer[which(longer$observation_type == "all"),] return(longer) } expanded_data <- expand_timeseries(readme_df[1,]) for (i in 2:nrow(readme_df)){ expanded_data <- rbind(expanded_data, expand_timeseries(readme_df[i,])) } #filter out the windows of time that we're looking at window_num <- 8 windowed_data <- expanded_data |> filter(week >= (27 - window_num) & week <= (27 + window_num)) |> mutate(D = ifelse(week > 27, 1, 0)) #scale the age numbers windowed_data$scaled_project_age <- scale(windowed_data$age_of_project) windowed_data$week_offset <- windowed_data$week - 27 #separate out the cleaning d all_actions_data <- windowed_data[which(windowed_data$observation_type == "all"),] mrg_actions_data <- windowed_data[which(windowed_data$observation_type == "mrg"),] all_actions_data$logged_count <- log(all_actions_data$count) all_actions_data$log1p_count <- log1p(all_actions_data$count) # 3 rdd in lmer analysis # rdd: https://rpubs.com/phle/r_tutorial_regression_discontinuity_design # lmer: https://www.youtube.com/watch?v=LzAwEKrn2Mc library(lme4) # https://www.bristol.ac.uk/cmm/learning/videos/random-intercepts.html#exvar library(optimx) library(lattice) all_model <- lmer(log1p_count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data, REML=FALSE, control = lmerControl( optimizer ='optimx', optCtrl=list(method='L-BFGS-B'))) #identifying the quartiles of effect for D all_model_ranef <- ranef(all_model, condVar=TRUE) dotplot(all_model_ranef) df_ranefs <- as.data.frame(all_model_ranef) D_df_ranef <- df_ranefs[df_ranefs$term == "D"] D_df_ranef <- df_ranefs[which(df_ranefs$term == "D"),] View(D_df_ranef) has_zero <- function(condval, condsd){ bounds <- condsd * 1.96 if ((condval - bounds) < 0){ if ((condval + bounds) > 0) { return(1) } else { return(0) } } else { return(2) } } df_ranefs |> mutate(ranef_grouping = has_zero(condval, condsd)) has_zero <- function(condval, condsd){ bounds <- condsd * 1.96 print(bounds) if ((condval - bounds) < 0){ if ((condval + bounds) > 0) { return(1) } else { return(0) } } else { return(2) } } df_ranefs |> mutate(ranef_grouping = has_zero(condval, condsd)) has_zero <- function(condval, condsd){ bounds <- condsd * 1.96 print(condval - bounds) if ((condval - bounds) < 0){ if ((condval + bounds) > 0) { return(1) } else { return(0) } } else { return(2) } } df_ranefs |> mutate(ranef_grouping = has_zero(condval, condsd)) has_zero <- function(condval, condsd){ bounds <- condsd * 1.96 return(ifelse(((condval - bounds) < 0),ifelse(((condval + bounds) > 0), 1, 0), 2)) } df_ranefs |> mutate(ranef_grouping = has_zero(condval, condsd)) df_ranefs |> mutate(ranef_grouping = has_zero(condval, condsd)) |> group_by(ranef_grouping) |> summarize(no_rows = length(ranef_grouping)) df_ranefs |> mutate(ranef_grouping = has_zero(condval, condsd)) |> group_by(ranef_grouping) |> summarize(no_rows = length(as.factor(ranef_grouping))) df_ranefs |> mutate(ranef_grouping = has_zero(condval, condsd)) |> group_by(ranef_grouping) |> summarize(no_rows = length(as.factor(ranef_grouping))) View(df_ranefs) has_zero <- function(condval, condsd){ bounds <- condsd * 1.96 return(ifelse(((condval - bounds) < 0),ifelse(((condval + bounds) > 0), 1, 0), 2)) } df_ranefs |> mutate(ranef_grouping = has_zero(condval, condsd)) View(df_ranefs) df_ranefs <- df_ranefs |> mutate(ranef_grouping = has_zero(condval, condsd)) View(df_ranefs) df_ranefs |> group_by(ranef_grouping) |> summarise(no_rows = length(ranef_grouping)) df_ranefs |> group_by(ranef_grouping) |> summarise(no_rows = length(ranef_grouping)) df_ranefs |> group_by(as.factor(ranef_grouping)) |> summarise(no_rows = length(ranef_grouping)) hist(df_ranefs$ranef_grouping) D_df_ranef <- df_ranefs[which(df_ranefs$term == "D"),] hist(D_df_ranefs$ranef_grouping) hist(D_df_ranef$ranef_grouping) #plot the ranefs library(ggplot2) D_df_ranef |> ggplot(aes(x=grp, y=condval)) D_df_ranef |> ggplot(aes(x=grp, y=condval, col = as.factor(ranef_grouping))) D_df_ranef |> ggplot(aes(x=condsd, y=condval, col = as.factor(ranef_grouping))) D_df_ranef |> ggplot(aes(x=condval, y=condval, col = as.factor(ranef_grouping))) D_df_ranef |> ggplot(aes(x=condval, y=condval, col = as.factor(ranef_grouping))) + geom_point() D_df_ranef |> ggplot(aes(x=grp, y=condval, col = as.factor(ranef_grouping))) + geom_point() df_ranefs <- df_ranefs |> mutate(ranef_grouping = has_zero(condval, condsd)) D_df_ranef <- df_ranefs[which(df_ranefs$term == "D"),] hist(D_df_ranef$ranef_grouping) D_df_ranef |> ggplot(aes(x=rank, y=condval, col = as.factor(ranef_grouping))) + geom_point() df_ranefs <- df_ranefs |> mutate(ranef_grouping = has_zero(condval, condsd)) D_df_ranef <- df_ranefs[which(df_ranefs$term == "D"),] df_ranefs <- df_ranefs |> mutate(ranef_grouping = has_zero(condval, condsd)) |> mutate(rank = rank(condval)) D_df_ranef <- df_ranefs[which(df_ranefs$term == "D"),] D_df_ranef |> ggplot(aes(x=rank, y=condval, col = as.factor(ranef_grouping))) + geom_point() D_df_ranef |> ggplot(aes(x=rank, y=condval, col = as.factor(ranef_grouping))) + geom_linerange(aes(ymin= condval - (1.96 * condsd), ymax= condval + (1.96 * condsd))) D_df_ranef |> ggplot(aes(x=grp, y=condval, col = as.factor(ranef_grouping))) + geom_linerange(aes(ymin= condval - (1.96 * condsd), ymax= condval + (1.96 * condsd))) D_df_ranef |> ggplot(aes(x=rank, y=condval, col = as.factor(ranef_grouping))) + geom_linerange(aes(ymin= condval - (1.96 * condsd), ymax= condval + (1.96 * condsd))) # mrg behavior for this mrg_model <- lmer(log1p_count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data, REML=FALSE, control = lmerControl( optimizer ='optimx', optCtrl=list(method='L-BFGS-B'))) #identifying the quartiles of effect for D mrg_model_ranef <- ranef(mrg_model, condVar=TRUE) df_mrg_ranefs <- as.data.frame(mrg_model_ranef) #doing similar random effect analysis for this df_mrg_ranefs <- df_mrg_ranefs |> mutate(ranef_grouping = has_zero(condval, condsd)) |> mutate(rank = rank(condval)) D_df_mrg_ranefs <- df_mrg_ranefs[which(df_mrg_ranefs$term == "D"),] D_df_mrg_ranefs |> ggplot(aes(x=rank, y=condval, col = as.factor(ranef_grouping))) + geom_linerange(aes(ymin= condval - (1.96 * condsd), ymax= condval + (1.96 * condsd))) D_df_ranef |> ggplot(aes(x=rank, y=condval, col = as.factor(ranef_grouping))) + geom_linerange(aes(ymin= condval - (1.96 * condsd), ymax= condval + (1.96 * condsd))) library(tidyverse) library(plyr) #get the contrib data instead try(setwd(dirname(rstudioapi::getActiveDocumentContext()$path))) contrib_df <- read_csv("../final_data/deb_contrib_did.csv") #some preprocessing and expansion col_order <- c("upstream_vcs_link", "age_of_project", "event_date", "event_hash", "before_all_ct", "after_all_ct", "before_mrg_ct", "after_mrg_ct", "before_auth_new", "after_auth_new", "before_commit_new", "after_commit_new") contrib_df <- contrib_df[,col_order] contrib_df$ct_before_all <- str_split(gsub("[][]","", contrib_df$before_all_ct), ", ") contrib_df$ct_after_all <- str_split(gsub("[][]","", contrib_df$after_all_ct), ", ") contrib_df$ct_before_mrg <- str_split(gsub("[][]","", contrib_df$before_mrg_ct), ", ") contrib_df$ct_after_mrg <- str_split(gsub("[][]","", contrib_df$after_mrg_ct), ", ") drop <- c("before_all_ct", "before_mrg_ct", "after_all_ct", "after_mrg_ct") contrib_df = contrib_df[,!(names(contrib_df) %in% drop)] # 2 some expansion needs to happens for each project expand_timeseries <- function(project_row) { longer <- project_row |> pivot_longer(cols = starts_with("ct"), names_to = "window", values_to = "count") |> unnest(count) longer$observation_type <- gsub("^.*_", "", longer$window) longer <- ddply(longer, "observation_type", transform, week=seq(from=0, by=1, length.out=length(observation_type))) longer$count <- as.numeric(longer$count) #longer <- longer[which(longer$observation_type == "all"),] return(longer) } expanded_data <- expand_timeseries(contrib_df[1,]) for (i in 2:nrow(contrib_df)){ expanded_data <- rbind(expanded_data, expand_timeseries(contrib_df[i,])) } #filter out the windows of time that we're looking at window_num <- 8 windowed_data <- expanded_data |> filter(week >= (27 - window_num) & week <= (27 + window_num)) |> mutate(D = ifelse(week > 27, 1, 0)) #scale the age numbers windowed_data$scaled_project_age <- scale(windowed_data$age_of_project) windowed_data$week_offset <- windowed_data$week - 27 #separate out the cleaning d all_actions_data <- windowed_data[which(windowed_data$observation_type == "all"),] mrg_actions_data <- windowed_data[which(windowed_data$observation_type == "mrg"),] all_actions_data$logged_count <- log(all_actions_data$count) all_actions_data$log1p_count <- log1p(all_actions_data$count) # now for merge mrg_actions_data$logged_count <- log(mrg_actions_data$count) mrg_actions_data$log1p_count <- log1p(mrg_actions_data$count) #TKTK --------------------- #imports for models library(lme4) library(optimx) library(lattice) #models -- TKTK need to be fixed all_model <- lmer(log1p_count ~ D * I(week_offset)+ scaled_project_age + (week_offset| upstream_vcs_link), data=all_actions_data, REML=FALSE, control = lmerControl( optimizer ='optimx', optCtrl=list(method='L-BFGS-B'))) summary(all_model) #identifying the quartiles of effect for D all_model_ranef <- ranef(all_model) #d_effect_ranef_all <- all_model_ranef[all_model_ranef$term=="D",] #d_effect_ranef_all$quartile <- ntile(d_effect_ranef_all$condval, 4) df_ranefs <- as.data.frame(all_model_ranef) has_zero <- function(condval, condsd){ bounds <- condsd * 1.96 return(ifelse(((condval - bounds) < 0),ifelse(((condval + bounds) > 0), 1, 0), 2)) } df_ranefs <- df_ranefs |> mutate(ranef_grouping = has_zero(condval, condsd)) |> mutate(rank = rank(condval)) wo_df_ranef <- df_ranefs[which(df_ranefs$term == "week_offset"),] library(ggplot2) wo_df_ranef |> ggplot(aes(x=rank, y=condval, col = as.factor(ranef_grouping))) + geom_linerange(aes(ymin= condval - (1.96 * condsd), ymax= condval + (1.96 * condsd))) wo_df_ranef |> ggplot(aes(x=rank, y=condval, col = as.factor(ranef_grouping))) + geom_linerange(aes(ymin= condval - (1.96 * condsd), ymax= condval + (1.96 * condsd))) + geom_bw() wo_df_ranef |> ggplot(aes(x=rank, y=condval, col = as.factor(ranef_grouping))) + geom_linerange(aes(ymin= condval - (1.96 * condsd), ymax= condval + (1.96 * condsd))) + theme_bw() wo_df_ranef |> ggplot(aes(x=rank, y=condval, col = as.factor(ranef_grouping))) + geom_pointrange(aes(ymin= condval - (1.96 * condsd), ymax= condval + (1.96 * condsd))) + theme_bw() wo_df_ranef |> ggplot(aes(x=rank, y=condval, col = as.factor(ranef_grouping))) + geom_crossbar(aes(ymin= condval - (1.96 * condsd), ymax= condval + (1.96 * condsd)), width=0.2) + theme_bw() wo_df_ranef |> ggplot(aes(x=rank, y=condval, col = as.factor(ranef_grouping))) + geom_linerange(aes(ymin= condval - (1.96 * condsd), ymax= condval + (1.96 * condsd))) + theme_bw() wo_df_ranef |> ggplot(aes(x=grp, y=condval, col = as.factor(ranef_grouping))) + geom_linerange(aes(ymin= condval - (1.96 * condsd), ymax= condval + (1.96 * condsd))) + theme_bw() wo_df_ranef <- wo_df_ranef |> arrange(condval) wo_df_ranef |> ggplot(aes(x=grp, y=condval, col = as.factor(ranef_grouping))) + geom_linerange(aes(ymin= condval - (1.96 * condsd), ymax= condval + (1.96 * condsd))) + theme_bw() View(wo_df_ranef) df_ranefs <- df_ranefs |> mutate(ranef_grouping = has_zero(condval, condsd)) wo_df_ranef <- df_ranefs[which(df_ranefs$term == "week_offset"),] wo_df_ranef <- wo_df_ranef |> mutate(rank = rank(condval)) library(ggplot2) wo_df_ranef |> ggplot(aes(x=grp, y=condval, col = as.factor(ranef_grouping))) + geom_linerange(aes(ymin= condval - (1.96 * condsd), ymax= condval + (1.96 * condsd))) + theme_bw() wo_df_ranef |> ggplot(aes(x=rank, y=condval, col = as.factor(ranef_grouping))) + geom_linerange(aes(ymin= condval - (1.96 * condsd), ymax= condval + (1.96 * condsd))) + theme_bw()