theme_bw() wo_df_ranef |> ggplot(aes(x=rank, y=condval, col = as.factor(ranef_grouping))) + geom_linerange(aes(ymin= condval - (1.96 * condsd), ymax= condval + (1.96 * condsd))) + theme_bw() # this is the file with the lmer multi-level rddAnalysis library(tidyverse) library(plyr) # 0 loading the readme data in try(setwd(dirname(rstudioapi::getActiveDocumentContext()$path))) readme_df <- read_csv("../final_data/deb_readme_did.csv") # 1 preprocessing #colnames(readme_df) <- c("upstream_vcs_link", "event_date", "event_hash", "before_all_ct", "before_mrg_ct", "after_all_ct", "after_mrg_ct", "before_auth_new", "after_commit_new", "after_auth_new", "before_commit_new") col_order <- c("upstream_vcs_link", "age_of_project", "event_date", "event_hash", "before_all_ct", "after_all_ct", "before_mrg_ct", "after_mrg_ct", "before_auth_new", "after_auth_new", "before_commit_new", "after_commit_new") readme_df <- readme_df[,col_order] readme_df$ct_before_all <- str_split(gsub("[][]","", readme_df$before_all_ct), ", ") readme_df$ct_after_all <- str_split(gsub("[][]","", readme_df$after_all_ct), ", ") readme_df$ct_before_mrg <- str_split(gsub("[][]","", readme_df$before_mrg_ct), ", ") readme_df$ct_after_mrg <- str_split(gsub("[][]","", readme_df$after_mrg_ct), ", ") drop <- c("before_all_ct", "before_mrg_ct", "after_all_ct", "after_mrg_ct") readme_df = readme_df[,!(names(readme_df) %in% drop)] # 2 some expansion needs to happens for each project expand_timeseries <- function(project_row) { longer <- project_row |> pivot_longer(cols = starts_with("ct"), names_to = "window", values_to = "count") |> unnest(count) longer$observation_type <- gsub("^.*_", "", longer$window) longer <- ddply(longer, "observation_type", transform, week=seq(from=0, by=1, length.out=length(observation_type))) longer$count <- as.numeric(longer$count) #longer <- longer[which(longer$observation_type == "all"),] return(longer) } expanded_data <- expand_timeseries(readme_df[1,]) for (i in 2:nrow(readme_df)){ expanded_data <- rbind(expanded_data, expand_timeseries(readme_df[i,])) } #filter out the windows of time that we're looking at window_num <- 8 windowed_data <- expanded_data |> filter(week >= (27 - window_num) & week <= (27 + window_num)) |> mutate(D = ifelse(week > 27, 1, 0)) #scale the age numbers windowed_data$scaled_project_age <- scale(windowed_data$age_of_project) windowed_data$week_offset <- windowed_data$week - 27 #separate out the cleaning d all_actions_data <- windowed_data[which(windowed_data$observation_type == "all"),] mrg_actions_data <- windowed_data[which(windowed_data$observation_type == "mrg"),] #find some EDA to identify which types of models might be the best for this hist(log(all_actions_data$count)) all_actions_data$logged_count <- log(all_actions_data$count) all_actions_data$log1p_count <- log1p(all_actions_data$count) # 3 rdd in lmer analysis # rdd: https://rpubs.com/phle/r_tutorial_regression_discontinuity_design # lmer: https://www.youtube.com/watch?v=LzAwEKrn2Mc library(lme4) # https://www.bristol.ac.uk/cmm/learning/videos/random-intercepts.html#exvar library(optimx) library(lattice) all_model <- lmer(log1p_count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data, REML=FALSE, control = lmerControl( optimizer ='optimx', optCtrl=list(method='L-BFGS-B'))) summary(all_model) #identifying the quartiles of effect for D all_model_ranef <- ranef(all_model, condVar=TRUE) dotplot(all_model_ranef) df_ranefs <- as.data.frame(all_model_ranef) D_df_ranef <- df_ranefs[which(df_ranefs$term == "D"),] #below this groups the ranefs has_zero <- function(condval, condsd){ bounds <- condsd * 1.96 return(ifelse(((condval - bounds) < 0),ifelse(((condval + bounds) > 0), 1, 0), 2)) } df_ranefs <- df_ranefs |> mutate(ranef_grouping = has_zero(condval, condsd)) |> mutate(rank = rank(condval)) D_df_ranef <- df_ranefs[which(df_ranefs$term == "D"),] hist(D_df_ranef$ranef_grouping) D_df_ranef |> ggplot(aes(x=rank, y=condval, col = as.factor(ranef_grouping))) + geom_linerange(aes(ymin= condval - (1.96 * condsd), ymax= condval + (1.96 * condsd))) + geom_bw() #plot the ranefs library(ggplot2) D_df_ranef |> ggplot(aes(x=rank, y=condval, col = as.factor(ranef_grouping))) + geom_linerange(aes(ymin= condval - (1.96 * condsd), ymax= condval + (1.96 * condsd))) + geom_bw() D_df_ranef |> ggplot(aes(x=rank, y=condval, col = as.factor(ranef_grouping))) + geom_linerange(aes(ymin= condval - (1.96 * condsd), ymax= condval + (1.96 * condsd))) + theme_bw() #identifying the quartiles of effect for D all_model_ranef <- ranef(all_model, condVar=TRUE) dotplot(all_model_ranef) df_ranefs <- as.data.frame(all_model_ranef) #below this groups the ranefs has_zero <- function(condval, condsd){ bounds <- condsd * 1.96 return(ifelse(((condval - bounds) < 0),ifelse(((condval + bounds) > 0), 1, 0), 2)) } df_ranefs <- df_ranefs |> mutate(ranef_grouping = has_zero(condval, condsd)) |> mutate(rank = rank(condval)) D_df_ranef <- df_ranefs[which(df_ranefs$term == "D"),] D_df_ranef |> ggplot(aes(x=rank, y=condval, col = as.factor(ranef_grouping))) + geom_linerange(aes(ymin= condval - (1.96 * condsd), ymax= condval + (1.96 * condsd))) + theme_bw() D_df_ranefs <- D_df_ranefs |> mutate(rank = rank(condval)) D_df_ranef <- D_df_ranef |> mutate(rank = rank(condval)) D_df_ranef |> ggplot(aes(x=rank, y=condval, col = as.factor(ranef_grouping))) + geom_linerange(aes(ymin= condval - (1.96 * condsd), ymax= condval + (1.96 * condsd))) + theme_bw() #identifying the quartiles of effect for D all_model_blup <- blup(all_model) all_model_ranef <- ranef(all_model) View(all_model_ranef) df_ranefs <- as.data.frame(all_model_ranef) dotplot(all_model_ranef) #identifying the quartiles of effect for D all_model_coef <- coef(all_model) View(all_model_coef) D_df_ranef <- df_ranefs[which(df_ranefs$term == "D"),] D_df_ranef <- df_ranefs[which(df_ranefs$term == "D"),] View(D_df_ranef) all_model <- lmer(log1p_count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data, REML=FALSE, control = lmerControl( optimizer ='optimx', optCtrl=list(method='L-BFGS-B'))) all_model_ranef <- ranef(all_model) df_ranefs <- as.data.frame(all_model_ranef) D_df_ranef <- df_ranefs[which(df_ranefs$term == "D"),] View(D_df_ranef) #identifying the quartiles of effect for D all_model_variances <- postVar(all_model) #identifying the quartiles of effect for D all_model_variances <- vcov(all_model, condVar=TRUE) View(all_model_variances) print(all_model_variances) View(all_model_variances) conditional_variances_random <- lapply(all_model_variances, diag) dotplot(conditional_variances_random) dotplot(conditional_variances_random, col = "blue", pch = 19, main = "Conditional Variances of Random Effects", xlab = "Conditional Variance", ylab = "Random Effect", scales = list(x = list(log = TRUE)), auto.key = list(space = "right")) #identifying the quartiles of effect for D all_model_variances <- vcov(all_model, full=TRUE, condVar=TRUE) View(all_model_variances) summary(all_model) #identifying the quartiles of effect for D all_model_variances <- vcov(all_model, full=TRUE, condVar=TRUE) View(all_model_variances) #identifying the quartiles of effect for D all_model_variances <- varCorr(all_model) #identifying the quartiles of effect for D all_model_variances <- VarCorr(all_model) View(all_model_variances) View(conditional_variances_random) View(all_model_variances) attr(VarCorr(all_model)$upstream_vcs_link, "stddevs")^2 values <- attr(VarCorr(all_model)$upstream_vcs_link, "stddevs")^2 #identifying the quartiles of effect for D all_model_variances <- vcov(all_model) View(all_model_variances) print(all_model_variances) all_model_ranef <- ranef(all_model)$upstream_vcs_link View(all_model_ranef) all_model_ranef <- cov(ranef(all_model)) random_effects <- ranef(all_model) random_effects_variances <- lapply(random_effects$upstream_vcs_link, function(x) { variances <- var(x$D:I(week_offset)) return(variances) }) variances <- var(x$D) summary_of_all <- summary(all_model) #identifying the quartiles of effect for D variance_components <- summary_of_all$varcor View(variance_components) all_model <- lmer(log1p_count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data, REML=FALSE, control = lmerControl( optimizer ='optimx', optCtrl=list(method='L-BFGS-B'))) #identifying the quartiles of effect for D varcorr_of_all <- VarCorr(all_model) View(varcorr_of_all) print(varcorr_of_all) all_coefficients <- coef(all_model) all_standard_errors <- sqrt(diag(vcov(all_model))) all_conf_intervals <- cbind(coefficients - 1.96 * standard_errors, coefficients + 1.96 * standard_errors) all_conf_intervals <- cbind(all_coefficients - 1.96 * all_standard_errors, all_coefficients + 1.96 * all_standard_errors) View(all_coefficients) View(conditional_variances_random) View(all_coefficients) #identifying the quartiles of effect for D confint(all_model) all_coefficients <- coef(all_model) all_standard_errors <- sqrt(diag(vcov(all_model)))[3] all_standard_errors <- sqrt(diag(vcov(all_model))) all_standard_errors <- sqrt(diag(vcov(all_model)))[4] all_standard_errors <- sqrt(diag(vcov(all_model)))[5] all_standard_errors <- sqrt(diag(vcov(all_model)))[6] all_standard_errors <- sqrt(diag(vcov(all_model)))[1] #identifying the quartiles of effect for D all_model_ranef <- ranef(all_model, condVar=TRUE) #identifying the quartiles of effect for D all_model_ranef_condvar <- ranef(all_model, condVar = TRUE) all_model_ranef <- ranef(all_model, condVar = FALSE) View(all_model_ranef) View(all_model_ranef_condvar) dotplot(all_model_ranef) dotplot(all_model_ranef_condvar) View(all_model_ranef_condvar) all_model_ranef_condvar[["upstream_vcs_link"]][["D"]] View(all_model_ranef) all_model_ranef_condvar$upstream_vcs_link all_model_ranef_condvar$upstream_vcs_link$D conditional_variances <- diag(vcov(model)$upstream_vcs_link$D) conditional_variances <- diag(vcov(all_model)$upstream_vcs_link$D) conditional_variances <- diag(vcov(all_model)) conditional_variances <- vcov(all_model) View(conditional_variances) #identifying the quartiles of effect for D all_model_ranef_condvar <- var(ranef(all_model, condVar = TRUE)) #identifying the quartiles of effect for D all_model_ranef_condvar <- var(ranef(all_model, condVar = TRUE)$upstream_vcs_link$D) #identifying the quartiles of effect for D all_model_ranef_condvar <- ranef(all_model, condVar = TRUE)$upstream_vcs_link$D #identifying the quartiles of effect for D all_model_ranef_condvar <- ranef(all_model, condVar = TRUE) View(all_model_ranef_condvar) #identifying the quartiles of effect for D all_model_ranef_condvar <- ranef(all_model, condVar = TRUE) View(all_model_ranef_condvar) attr(all_model_ranef_condvar$upstream_vcs_link$D, "condVar") attr(all_model_ranef_condvar$upstream_vcs_link, "condVar") df_ranefs <- as.data.frame(all_model_ranef_condvar) View(df_ranefs) View(all_model_ranef_condvar) #all_model <- lmer(log1p_count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data, REML=FALSE, control = lmerControl( # optimizer ='optimx', optCtrl=list(method='L-BFGS-B'))) all_model <- lmer(log1p_count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data, REML=FALSE) #identifying the quartiles of effect for D all_model_ranef_condvar <- ranef(all_model, condVar = TRUE) attr(all_model_ranef_condvar$upstream_vcs_link, "condVar") #all_model <- lmer(log1p_count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data, REML=FALSE, control = lmerControl( # optimizer ='optimx', optCtrl=list(method='L-BFGS-B'))) all_model <- lmer(log1p_count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data, REML=TRUE) #identifying the quartiles of effect for D all_model_ranef_condvar <- ranef(all_model, condVar = TRUE) attr(all_model_ranef_condvar$upstream_vcs_link, "condVar") df_ranefs <- as.data.frame(all_model_ranef_condvar) View(df_ranefs) #identifying the quartiles of effect for D all_model_ranef_condvar <- ranef(all_model, condVar = TRUE) View(all_model_ranef_condvar) all_model_ranef <- ranef(all_model, condVar = FALSE) View(all_model_ranef_condvar) View(all_model_ranef_condvar[["upstream_vcs_link"]]) all_model_ranef_condvar[["upstream_vcs_link"]][["D"]] View(all_model_ranef) df_rn_no_cv <- as.data.frame(all_model_ranef) View(df_rn_no_cv) View(df_ranefs) attr(all_model_ranef_condvar$upstream_vcs_link, "postVar") attr(all_model_ranef_condvar$upstream_vcs_link$D, "postVar") attr(all_model_ranef_condvar$upstream_vcs_link, "postVar") attr(all_model_ranef_condvar$upstream_vcs_link, "postVar")[[4]] attr(all_model_ranef_condvar$upstream_vcs_link, "postVar")[[3]] attr(all_model_ranef_condvar$upstream_vcs_link, "postVar")[[2]] attr(all_model_ranef_condvar$upstream_vcs_link, "postVar")[4] attr(all_model_ranef_condvar$upstream_vcs_link, "postVar") all_model <- lmer(log1p_count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data, REML=FALSE, control = lmerControl( optimizer ='optimx', optCtrl=list(method='L-BFGS-B'))) isSingular(all_model) all_model <- lmer(log1p_count ~ D * I(week_offset)+ scaled_project_age + (week_offset| upstream_vcs_link), data=all_actions_data, REML=FALSE, control = lmerControl( optimizer ='optimx', optCtrl=list(method='L-BFGS-B'))) all_model <- lmer(log1p_count ~ D * I(week_offset)+ scaled_project_age + (week_offset| upstream_vcs_link), data=all_actions_data, REML=FALSE) all_model <- lmer(log1p_count ~ D * I(week_offset)+ scaled_project_age + (I:(week_offset)| upstream_vcs_link), data=all_actions_data, REML=FALSE) all_model <- lmer(log1p_count ~ D * I(week_offset)+ scaled_project_age + (I(week_offset)| upstream_vcs_link), data=all_actions_data, REML=FALSE) all_model <- lmer(log1p_count ~ D * I(week_offset)+ scaled_project_age + (D| upstream_vcs_link), data=all_actions_data, REML=FALSE) summary_of_all <- summary(all_model) summary(all_model) #identifying the quartiles of effect for D all_model_ranef_condvar <- ranef(all_model, condVar = TRUE) attr(all_model_ranef_condvar$upstream_vcs_link, "postVar") all_model <- lmer(log1p_count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data, REML=FALSE, control = lmerControl( optimizer ='optimx', optCtrl=list(method='L-BFGS-B'))) # this is the file with the lmer multi-level rddAnalysis library(tidyverse) library(plyr) # 0 loading the readme data in try(setwd(dirname(rstudioapi::getActiveDocumentContext()$path))) readme_df <- read_csv("../final_data/deb_readme_did.csv") # 1 preprocessing #colnames(readme_df) <- c("upstream_vcs_link", "event_date", "event_hash", "before_all_ct", "before_mrg_ct", "after_all_ct", "after_mrg_ct", "before_auth_new", "after_commit_new", "after_auth_new", "before_commit_new") col_order <- c("upstream_vcs_link", "age_of_project", "event_date", "event_hash", "before_all_ct", "after_all_ct", "before_mrg_ct", "after_mrg_ct", "before_auth_new", "after_auth_new", "before_commit_new", "after_commit_new") readme_df <- readme_df[,col_order] readme_df$ct_before_all <- str_split(gsub("[][]","", readme_df$before_all_ct), ", ") readme_df$ct_after_all <- str_split(gsub("[][]","", readme_df$after_all_ct), ", ") readme_df$ct_before_mrg <- str_split(gsub("[][]","", readme_df$before_mrg_ct), ", ") readme_df$ct_after_mrg <- str_split(gsub("[][]","", readme_df$after_mrg_ct), ", ") drop <- c("before_all_ct", "before_mrg_ct", "after_all_ct", "after_mrg_ct") readme_df = readme_df[,!(names(readme_df) %in% drop)] # 2 some expansion needs to happens for each project expand_timeseries <- function(project_row) { longer <- project_row |> pivot_longer(cols = starts_with("ct"), names_to = "window", values_to = "count") |> unnest(count) longer$observation_type <- gsub("^.*_", "", longer$window) longer <- ddply(longer, "observation_type", transform, week=seq(from=0, by=1, length.out=length(observation_type))) longer$count <- as.numeric(longer$count) #longer <- longer[which(longer$observation_type == "all"),] return(longer) } expanded_data <- expand_timeseries(readme_df[1,]) for (i in 2:nrow(readme_df)){ expanded_data <- rbind(expanded_data, expand_timeseries(readme_df[i,])) } #filter out the windows of time that we're looking at window_num <- 8 windowed_data <- expanded_data |> filter(week >= (27 - window_num) & week <= (27 + window_num)) |> mutate(D = ifelse(week > 27, 1, 0)) #scale the age numbers windowed_data$scaled_project_age <- scale(windowed_data$age_of_project) windowed_data$week_offset <- windowed_data$week - 27 #separate out the cleaning d all_actions_data <- windowed_data[which(windowed_data$observation_type == "all"),] mrg_actions_data <- windowed_data[which(windowed_data$observation_type == "mrg"),] all_actions_data$log1p_count <- log1p(all_actions_data$count) # 3 rdd in lmer analysis # rdd: https://rpubs.com/phle/r_tutorial_regression_discontinuity_design # lmer: https://www.youtube.com/watch?v=LzAwEKrn2Mc library(lme4) # https://www.bristol.ac.uk/cmm/learning/videos/random-intercepts.html#exvar library(optimx) library(lattice) all_model <- lmer(log1p_count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data, REML=FALSE, control = lmerControl( optimizer ='optimx', optCtrl=list(method='L-BFGS-B'))) #identifying the quartiles of effect for D mmcm = coef(all_model)$upstream_vcs_link[, 1] vcov.vals = as.data.frame(VarCorr(all_model)) View(vcov.vals) #identifying the quartiles of effect for D mmcm = coef(all_model)$upstream_vcs_link View(mmcm) summary(all_model)$coef[,2] View(mmcm) variance_components <- VarCorr(all_model) group_variance <- attr(variance_components$upstream_vcs_link, "stddev")^2 View(mmcm) fixef(all()) fixef(all_model summary(all_model)$coef[,2] fixef(all_model) fixed_impacts = fixef(all_model) dotplot(all_model_ranef_condvar) all_model_ranef_condvar <- ranef(all_model, condVar = TRUE) dotplot(all_model_ranef_condvar) broom.mixed::tidy(all_model, effects = "ran_vals", conf.int = TRUE) test <- broom.mixed::tidy(all_model, effects = "ran_vals", conf.int = TRUE) View(test) all_gmodel <- glmer(log1p_count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data, family = Gamma) all_gmodel <- glmer(count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data, family = Gamma) all_gmodel <- glmer(log1p_count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data, family=poisson) all_gmodel <- glmer(log1p_count ~ D * I(week_offset)+ scaled_project_age + (D | upstream_vcs_link), data=all_actions_data, family=poisson) all_gmodel <- glmer(log1p_count ~ D * I(week_offset)+ scaled_project_age + (D | upstream_vcs_link), data=all_actions_data, family=binomial) all_gmodel <- glmer(count ~ D * I(week_offset)+ scaled_project_age + (D | upstream_vcs_link), data=all_actions_data, family=binomial) df_ranefs <- as.data.frame(all_model_ranef_condvar) all_gmodel <- glmer(count ~ D * I(week_offset)+ scaled_project_age + (D | upstream_vcs_link), data=all_actions_data, family=binomial) all_gmodel <- glmer(count ~ D * I(week_offset)+ scaled_project_age + (1 | upstream_vcs_link), data=all_actions_data, family=poisson) all_model_ranef_condvar <- ranef(all_gmodel, condVar = TRUE) all_model_ranef_condvar <- ranef(all_model, condVar = TRUE) all_gmodel_ranef_condvar <- ranef(all_gmodel, condVar = TRUE) View(all_gmodel_ranef_condvar) test <- broom.mixed::tidy(all_gmodel, effects = "ran_vals", conf.int = TRUE) View(test) all_gmodel <- glmer(count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)+ scaled_project_age | upstream_vcs_link), data=all_actions_data) test <- broom.mixed::tidy(all_gmodel, effects = "ran_vals", conf.int = TRUE) View(test) summary(all_gmodel) all_model <- lmer(log1p_count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data, REML=TRUE, control = lmerControl( optimizer ='optimx', optCtrl=list(method='L-BFGS-B'))) test <- broom.mixed::tidy(all_model, effects = "ran_vals", conf.int = TRUE) View(test) test_condvals <- broom.mixed::tidy(all_gmodel, effects = "ran_vals", conf.int = TRUE) View(test_condvals) test_glmer_ranef_D <- test_condvals [which(test_condvals $term == "D"),] View(test_glmer_ranef_D) test_glmer_ranef_D <- test_condvals [which(test_condvals $term == "D"),] has_zero <- function(estimate, low, high){ return(ifelse((low < 0),ifelse((high > 0), 1, 0), 2)) } test_glmer_ranef_D <- test_glmer_ranef_D |> mutate(ranef_grouping = has_zero(estimate, conf.low, conf.high)) |> mutate(rank = rank(estimate)) test_glmer_ranef_D |> ggplot(aes(x=rank, y=condval, col = as.factor(ranef_grouping))) + geom_linerange(aes(ymin= conf.low, ymax= conf.high)) + theme_bw() test_glmer_ranef_D |> ggplot(aes(x=rank, y=estimate, col = as.factor(ranef_grouping))) + geom_linerange(aes(ymin= conf.low, ymax= conf.high)) + theme_bw() summary(all_gmodel) all_gmodel <- glmer(count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data) summary(all_gmodel) test_condvals <- broom.mixed::tidy(all_gmodel, effects = "ran_vals", conf.int = TRUE) test_glmer_ranef_D <- test_condvals [which(test_condvals $term == "D"),] has_zero <- function(estimate, low, high){ return(ifelse((low < 0),ifelse((high > 0), 1, 0), 2)) } test_glmer_ranef_D <- test_glmer_ranef_D |> mutate(ranef_grouping = has_zero(estimate, conf.low, conf.high)) |> mutate(rank = rank(estimate)) test_glmer_ranef_D |> ggplot(aes(x=rank, y=estimate, col = as.factor(ranef_grouping))) + geom_linerange(aes(ymin= conf.low, ymax= conf.high)) + theme_bw() View(test_glmer_ranef_D) View(test_condvals) all_gmodel <- glmer(count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data) summary(all_gmodel) test_condvals <- broom.mixed::tidy(all_gmodel, effects = "ran_vals", conf.int = TRUE) View(test_condvals) all_gmodel <- glmer(count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data, family = Poisson) all_gmodel <- glmer(count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data, family = poisson) summary(all_gmodel) all_gmodel <- glmer(count ~ D * I(week_offset)+ scaled_project_age + (D | upstream_vcs_link), data=all_actions_data, family = poisson) summary(all_gmodel) test_condvals <- broom.mixed::tidy(all_gmodel, effects = "ran_vals", conf.int = TRUE) test_glmer_ranef_D <- test_condvals [which(test_condvals $term == "D"),] has_zero <- function(estimate, low, high){ return(ifelse((low < 0),ifelse((high > 0), 1, 0), 2)) } test_glmer_ranef_D <- test_glmer_ranef_D |> mutate(ranef_grouping = has_zero(estimate, conf.low, conf.high)) |> mutate(rank = rank(estimate)) test_glmer_ranef_D |> ggplot(aes(x=rank, y=estimate, col = as.factor(ranef_grouping))) + geom_linerange(aes(ymin= conf.low, ymax= conf.high)) + theme_bw() all_gmodel <- glmer(count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data, family = poisson) all_gmodel <- glmer(count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data, nAGQ=0, family = poisson) summary(all_gmodel) test_condvals <- broom.mixed::tidy(all_gmodel, effects = "ran_vals", conf.int = TRUE) test_glmer_ranef_D <- test_condvals [which(test_condvals $term == "D"),] has_zero <- function(estimate, low, high){ return(ifelse((low < 0),ifelse((high > 0), 1, 0), 2)) } test_glmer_ranef_D <- test_glmer_ranef_D |> mutate(ranef_grouping = has_zero(estimate, conf.low, conf.high)) |> mutate(rank = rank(estimate)) test_glmer_ranef_D |> ggplot(aes(x=rank, y=estimate, col = as.factor(ranef_grouping))) + geom_linerange(aes(ymin= conf.low, ymax= conf.high)) + theme_bw() variance(all_actions_data$log1p_count) var(all_actions_data$log1p_count) mean (all_actions_data$log1p_count) #all_gmodel <- glmer(count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data, nAGQ=0, family = poisson) all_gmodel <- glmer.nb(count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link),data=all_actions_data) #all_gmodel <- glmer(count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data, nAGQ=0, family = poisson) all_gmodel <- glmer.nb(count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), control=glmerControl(optimizer="bobyqa", optCtrl=list(maxfun=2e5)), data=all_actions_data) #all_gmodel <- glmer(count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data, nAGQ=0, family = poisson) all_gmodel <- glmer.nb(count ~ D * I(week_offset)+ scaled_project_age + (D | upstream_vcs_link), control=glmerControl(optimizer="bobyqa", optCtrl=list(maxfun=2e5)), data=all_actions_data) summary(all_gmodel) test_condvals <- broom.mixed::tidy(all_gmodel, effects = "ran_vals", conf.int = TRUE) test_glmer_ranef_D <- test_condvals [which(test_condvals $term == "D"),] has_zero <- function(estimate, low, high){ return(ifelse((low < 0),ifelse((high > 0), 1, 0), 2)) } test_glmer_ranef_D <- test_glmer_ranef_D |> mutate(ranef_grouping = has_zero(estimate, conf.low, conf.high)) |> mutate(rank = rank(estimate)) test_glmer_ranef_D |> ggplot(aes(x=rank, y=estimate, col = as.factor(ranef_grouping))) + geom_linerange(aes(ymin= conf.low, ymax= conf.high)) + theme_bw() #all_gmodel <- glmer(count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data, nAGQ=0, family = poisson) #all_gmodel <- glmer.nb(count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset) | upstream_vcs_link), # control=glmerControl(optimizer="bobyqa", # optCtrl=list(maxfun=2e5)), data=all_actions_data) all_gmodel <- glmer.nb(count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset) | upstream_vcs_link), data=all_actions_data) #all_gmodel <- glmer(count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data, nAGQ=0, family = poisson) #all_gmodel <- glmer.nb(count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset) | upstream_vcs_link), # control=glmerControl(optimizer="bobyqa", # optCtrl=list(maxfun=2e5)), data=all_actions_data) all_gmodel <- glmer.nb(count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset) | upstream_vcs_link), data=all_actions_data, verbose=TRUE) #all_gmodel <- glmer(count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data, nAGQ=0, family = poisson) #all_gmodel <- glmer.nb(count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset) | upstream_vcs_link), # control=glmerControl(optimizer="bobyqa", # optCtrl=list(maxfun=2e5)), data=all_actions_data) all_gmodel <- glmer.nb(count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset) | upstream_vcs_link), data=all_actions_data) #all_gmodel <- glmer(count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data, nAGQ=0, family = poisson) #all_gmodel <- glmer.nb(count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset) | upstream_vcs_link), # control=glmerControl(optimizer="bobyqa", # optCtrl=list(maxfun=2e5)), data=all_actions_data) all_gmodel <- glmer.nb(count ~ D * I(week_offset)+ scaled_project_age + (D | upstream_vcs_link), data=all_actions_data)