managing glmer

2024-05-09 17:05:21 -05:00 · 2024-05-09 17:05:21 -05:00 · 9dc810bedf
commit 9dc810bedf
parent f59ce460e2
2 changed files with 412 additions and 391 deletions
--- a/R/.Rhistory
+++ b/R/.Rhistory
@ -1,190 +1,8 @@
 # a) the basic things, in a table:
 #   Condition           Sample Size       mean    standard deviation  standard error
 # Immediately after       2               48.705      1.534422          1.085
 # One day after           2               41.955      2.128391          1.505
 # Three days after        2               21.795      0.7707464         0.545
 # Five days after         2               12.415      1.081873          0.765
 # Seven days after        2               8.32        0.2687006         0.19
 # b) do a one way anova based on the data, like the last homework
 grp <- c(1,1,2,2,3,3,4,4,5,5)
 results <- aov(resp~factor(grp))
 anova(results)
 # c) summarize the data and the means w a plot, boxplot
 means <- c(48.705, 41.955, 21.795, 12.415, 8.32)
 # c) summarize the data and the means w a plot, boxplot
 boxplot(results)
 # c) summarize the data and the means w a plot, boxplot
 boxplot(resp)
 # c) summarize the data and the means w a plot, boxplot
 boxplot(resp)
 # c) summarize the data and the means w a plot, boxplot
 boxplot(resp~grp)
 ALevels <- c(3.36, 3.34, 3.28, 3.20, 3.26, 3.16, 3.25, 3.36, 3.01, 2.92)
 ELevels <- c(94.6, 96.0, 95.7, 93.2, 97.4, 94.3, 95.0, 97.7, 92.3, 95.1)
 Aresults <- aov(Alevels~factor(grp))
 ALevels <- c(3.36, 3.34, 3.28, 3.20, 3.26, 3.16, 3.25, 3.36, 3.01, 2.92)
 ELevels <- c(94.6, 96.0, 95.7, 93.2, 97.4, 94.3, 95.0, 97.7, 92.3, 95.1)
 Aresults <- aov(Alevels~factor(grp))
 ALevels <- c(3.36, 3.34, 3.28, 3.20, 3.26, 3.16, 3.25, 3.36, 3.01, 2.92)
 ELevels <- c(94.6, 96.0, 95.7, 93.2, 97.4, 94.3, 95.0, 97.7, 92.3, 95.1)
 Aresults <- aov(ALevels~factor(grp))
 Eresults <- aov(ELevels~factor(grp))
 # Vitamin A Anova:
 anova(Aresults)
 # Vimain E Anova:
 anova(Eresults)
 # 12.10
 # four groups, how do nemaotodes impact plant growth
 # a)
 zero_nema <- c(10.8, 9.1, 13.5, 9.2)
 thousand_name <-c(11.1, 11.1, 8.2, 11.3)
 thousand_nema <-c(11.1, 11.1, 8.2, 11.3)
 fthousand_nema <- c(5.4, 4.6, 7.4, 5.0)
 tthousand_nema <- c(5.8, 5.3, 3.2, 7.5)
 mean(zero_nema)
 sd(zero_nema)
 mean(thousand_nema)
 sd(thousand_name)
 mean(fthousand_nema)
 sd(fthousand_nema)
 mean(tthousand_nema)
 sd(tthousand_nema)
 # Table
 # Nematodes       Means       StdDev
 #   0             10.65         2.053452
 # 1,000           10.425        1.486327
 # 5,000           5.6           1.243651
 # 10,000          5.45          1.771064
 nema_means <- c(10.65, 10.425, 5.6, 5.45)
 barplot(nema_means)
 # c)
 groupings <- c(1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4)
 resp <- c(zero_nema, thousand_nema, fthousand_nema, tthousand_nema)
 results <- aov(resp~factor(groupings))
 anova(results)
 # 12.5
 # do piano lessons improve spacial temporal
 piano <- c( 2, 5, 7, -2, 2, 7, 4, 1, 0, 7, 3, 4, 3, 4, 9, 4, 5, 2, 9, 6, 0, 3, 6, -1, 3, 4, 6, 7, -2, 7, -3, 3, 4, 4)
 singing <- c(1, -1, 0, 1, -4, 0, 0, 1, 0, -1)
 computer <- c(0, 1, 1, -3, -2, 4, -1, 2, 4, 2,2, 2, -3, -3, 0, 2, 0, -1, 3, -1 )
 none <- c(5, -1, 7, 0, 4, 0, 2, 1, -6, 0, 2, -1, 0, -2)
 size(piano)
 length(piano)
 mean(piano)
 sd(piano)
 sd(piano)/sqrt(lenth(piano))
 sd(piano)/sqrt(length(piano))
 length(singing)
 mean(singing)
 sd(singing)
 sd(signing)/sqrt(length(singing))
 sd(singing)/sqrt(length(singing))
 length(computer)
 mean(computer)
 sd(computer)
 sd(computer)/sqrt(length(computer))
 length(none)
 mean(none)
 sd(none)
 sd(none)/sqrt(14)
 # a) make a table given the sample size
 # Table:
 #   Lessons     Size    Mean    Standard Dev    Standard Error
 #   Piano       34      3.617647  3.055196        0.5239618
 #   Singing     10      -0.3      1.494434        0.4725816
 #   Computer    20      0.45      2.21181         0.4945758
 #   None        14      0.7857143 3.190818        0.8527819
 # b)
 # H0: The spatial-temporal reasoning test results across different lesson groups will be statistically equivalent.
 # Ha: For at least one lesson group, the results of the reasoning test will be statistically different.
 data_panel <- data.frame(
 Y=c(piano, singing, computer, none),
 Site = factor(rep(c("piano", "singing", "computer", "none"), times=c(length(piano), length(computer), length(singing), length(none))))
 )
 data_panel
 tempt <- aov(Y~Site, data=data_panel)
 anova(tempt)
 # 12.6
 TukeyHSD(tempt)
 # Summary: Looking at the TukeyHSD results, there are some interesting notes in
 # where statistically significant variance lies. If we immediately discard the
 # comparisons with large p-values, we are left with three statistically significant
 # ones. One is that students with piano lessons do better than computer lesson learners
 # by an average of 3.5 points, another is that piano outperforms no lessons by about 2.8 points
 # and lastly that singing underperforms piano by about 3.3 points. While this
 # statistical tooling is useful for proving the significance of these differences in
 # performance, we can also evaluate
 means <- c(mean(piano), mean(singing), mean(computer), mean(none))
 barplot(means)
 # (1) - Get the pilot data and clean it
 #source('~/Research/tor_wikipedia_edits/handcoded_edits/inter_coder_reliability_ns0.R')
 #source ('/data/users/mgaughan/kkex_data_110823_3')
 data1 <- read_csv('../power_data_111023_mmt.csv',show_col_types = FALSE)
 library(readr)
 library(ggplot2)
 # (1) - Get the pilot data and clean it
 #source('~/Research/tor_wikipedia_edits/handcoded_edits/inter_coder_reliability_ns0.R')
 #source ('/data/users/mgaughan/kkex_data_110823_3')
 data1 <- read_csv('../power_data_111023_mmt.csv',show_col_types = FALSE)
 data2 <- read_csv('../inst_all_packages_full_results.csv')
 # (1) - Get the pilot data and clean it
 #source('~/Research/tor_wikipedia_edits/handcoded_edits/inter_coder_reliability_ns0.R')
 #source ('/data/users/mgaughan/kkex_data_110823_3')
 data1 <- read_csv('../power_data_111023_mmt.csv',show_col_types = FALSE)
 library(readr)
 library(ggplot2)
 # (1) - Get the pilot data and clean it
 #source('~/Research/tor_wikipedia_edits/handcoded_edits/inter_coder_reliability_ns0.R')
 #source ('/data/users/mgaughan/kkex_data_110823_3')
 data1 <- read_csv('../power_data_111023_mmt.csv',show_col_types = FALSE)
 data1 <- read_csv('../expanded_data_final.csv',show_col_types = FALSE)
 # Use pilot project data to calculate power of a full study through simulation
 #
 # Parts:
 # (0) - Setup
 # (1) - Get the pilot data and clean it
 # (2) - Run the model on the pilot data and extract effects
 # (3) - Set up and run the simulation
 # ====> Set variables at the arrows <====
 #
 ##############################################################################
 rm(list=ls())
 set.seed(424242)
 library(readr)
 library(ggplot2)
 data1 <- read_csv('../expanded_data_final.csv',show_col_types = FALSE)
 set.seed(424242)
 library(readr)
 library(ggplot2)
 data1 <- read_csv('../expanded_data_final.csv',show_col_types = FALSE)
 #shows the cross-age downward slopes for all underproduction averages in the face of MMT
 g3 <- ggplot(data1, aes(x=mmt, y=underproduction_mean)) +
 geom_smooth(mapping = aes(x=mmt, y=underproduction_mean, color=new.age.factor),
 method='lm', formula= y~x) +
 xlab("MMT") +
 ylab("Underproduction Factor") +
 theme_bw()
-g3
+wo_df_ranef |>
-library(readr)
+ggplot(aes(x=rank, y=condval, col = as.factor(ranef_grouping))) +
-library(ggplot2)
+geom_linerange(aes(ymin= condval - (1.96 * condsd), ymax= condval + (1.96 * condsd))) +
-data1 <- read_csv('../expanded_data_final.csv',show_col_types = FALSE)
+theme_bw()
 mean(data1$milestone_count)
 data1$mmt <- (((data1$collaborators * 2)+ data1$contributors) / (data1$contributors + data1$collaborators)) - 1
 mean(data1$mmt)
 rm(list=ls())
 set.seed(424242)
 library(readr)
 library(ggplot2)
 data1 <- read_csv('../expanded_data_final.csv',show_col_types = FALSE)
 library(readr)
 library(ggplot2)
 data1 <- read_csv('../power_data_111023_mmt.csv',show_col_types = FALSE)
 data2 <- read_csv('../inst_all_packages_full_results.csv')
 data1 <- read_csv('../kk_final_expanded_data_final.csv',show_col_types = FALSE)
 library(readr)
 library(ggplot2)
 library(tidyverse)
 data1 <- read_csv('../kk_final_expanded_data_final.csv',show_col_types = FALSE)
 # this is the file with the lmer multi-level rddAnalysis
 library(tidyverse)
 library(plyr)
@ -229,6 +47,8 @@ windowed_data$week_offset <- windowed_data$week - 27
 #separate out the cleaning d
 all_actions_data <- windowed_data[which(windowed_data$observation_type == "all"),]
 mrg_actions_data <- windowed_data[which(windowed_data$observation_type == "mrg"),]
 #find some EDA to identify which types of models might be the best for this
 hist(log(all_actions_data$count))
 all_actions_data$logged_count <- log(all_actions_data$count)
 all_actions_data$log1p_count <- log1p(all_actions_data$count)
 # 3 rdd in lmer analysis
@ -240,172 +60,254 @@ library(optimx)
 library(lattice)
 all_model <- lmer(log1p_count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data, REML=FALSE, control = lmerControl(
 optimizer ='optimx', optCtrl=list(method='L-BFGS-B')))
 summary(all_model)
 #identifying the quartiles of effect for D
 all_model_ranef <- ranef(all_model, condVar=TRUE)
 dotplot(all_model_ranef)
 df_ranefs <- as.data.frame(all_model_ranef)
 D_df_ranef <- df_ranefs[df_ranefs$term == "D"]
 D_df_ranef <- df_ranefs[which(df_ranefs$term == "D"),]
-View(D_df_ranef)
+#below this groups the ranefs
 has_zero <- function(condval, condsd){
 bounds <- condsd * 1.96
 if ((condval - bounds) < 0){
 if ((condval + bounds) > 0) {
 return(1)
 } else {
 return(0)
 }
 } else {
 return(2)
 }
 }
 df_ranefs |>
 mutate(ranef_grouping = has_zero(condval, condsd))
 has_zero <- function(condval, condsd){
 bounds <- condsd * 1.96
 print(bounds)
 if ((condval - bounds) < 0){
 if ((condval + bounds) > 0) {
 return(1)
 } else {
 return(0)
 }
 } else {
 return(2)
 }
 }
 df_ranefs |>
 mutate(ranef_grouping = has_zero(condval, condsd))
 has_zero <- function(condval, condsd){
 bounds <- condsd * 1.96
 print(condval - bounds)
 if ((condval - bounds) < 0){
 if ((condval + bounds) > 0) {
 return(1)
 } else {
 return(0)
 }
 } else {
 return(2)
 }
 }
 df_ranefs |>
 mutate(ranef_grouping = has_zero(condval, condsd))
 has_zero <- function(condval, condsd){
 bounds <- condsd * 1.96
 return(ifelse(((condval - bounds) < 0),ifelse(((condval + bounds) > 0), 1, 0), 2))
 }
 df_ranefs |>
 mutate(ranef_grouping = has_zero(condval, condsd))
 df_ranefs |>
 mutate(ranef_grouping = has_zero(condval, condsd)) |>
 group_by(ranef_grouping) |>
 summarize(no_rows = length(ranef_grouping))
 df_ranefs |>
 mutate(ranef_grouping = has_zero(condval, condsd)) |>
 group_by(ranef_grouping) |>
 summarize(no_rows = length(as.factor(ranef_grouping)))
 df_ranefs |>
 mutate(ranef_grouping = has_zero(condval, condsd)) |>
 group_by(ranef_grouping) |>
 summarize(no_rows = length(as.factor(ranef_grouping)))
 View(df_ranefs)
 has_zero <- function(condval, condsd){
 bounds <- condsd * 1.96
 return(ifelse(((condval - bounds) < 0),ifelse(((condval + bounds) > 0), 1, 0), 2))
 }
 df_ranefs |>
 mutate(ranef_grouping = has_zero(condval, condsd))
 View(df_ranefs)
 df_ranefs <- df_ranefs |>
-mutate(ranef_grouping = has_zero(condval, condsd))
+mutate(ranef_grouping = has_zero(condval, condsd)) |>
-View(df_ranefs)
+mutate(rank = rank(condval))
 df_ranefs |>
 group_by(ranef_grouping) |>
 summarise(no_rows = length(ranef_grouping))
 df_ranefs |>
 group_by(ranef_grouping) |>
 summarise(no_rows = length(ranef_grouping))
 df_ranefs |>
 group_by(as.factor(ranef_grouping)) |>
 summarise(no_rows = length(ranef_grouping))
 hist(df_ranefs$ranef_grouping)
 D_df_ranef <- df_ranefs[which(df_ranefs$term == "D"),]
 hist(D_df_ranefs$ranef_grouping)
 hist(D_df_ranef$ranef_grouping)
 D_df_ranef |>
 ggplot(aes(x=rank, y=condval, col = as.factor(ranef_grouping))) +
 geom_linerange(aes(ymin= condval - (1.96 * condsd), ymax= condval + (1.96 * condsd))) +
 geom_bw()
 #plot the ranefs
 library(ggplot2)
 D_df_ranef |>
-ggplot(aes(x=grp, y=condval))
+ggplot(aes(x=rank, y=condval, col = as.factor(ranef_grouping))) +
-D_df_ranef |>
+geom_linerange(aes(ymin= condval - (1.96 * condsd), ymax= condval + (1.96 * condsd))) +
-ggplot(aes(x=grp, y=condval, col = as.factor(ranef_grouping)))
+geom_bw()
 D_df_ranef |>
 ggplot(aes(x=condsd, y=condval, col = as.factor(ranef_grouping)))
 D_df_ranef |>
 ggplot(aes(x=condval, y=condval, col = as.factor(ranef_grouping)))
 D_df_ranef |>
 ggplot(aes(x=condval, y=condval, col = as.factor(ranef_grouping))) +
 geom_point()
 D_df_ranef |>
 ggplot(aes(x=grp, y=condval, col = as.factor(ranef_grouping))) +
 geom_point()
 df_ranefs <- df_ranefs |>
 mutate(ranef_grouping = has_zero(condval, condsd))
 D_df_ranef <- df_ranefs[which(df_ranefs$term == "D"),]
 hist(D_df_ranef$ranef_grouping)
 D_df_ranef |>
 ggplot(aes(x=rank, y=condval, col = as.factor(ranef_grouping))) +
-geom_point()
+geom_linerange(aes(ymin= condval - (1.96 * condsd), ymax= condval + (1.96 * condsd))) +
-df_ranefs <- df_ranefs |>
+theme_bw()
-mutate(ranef_grouping = has_zero(condval, condsd))
+#identifying the quartiles of effect for D
-D_df_ranef <- df_ranefs[which(df_ranefs$term == "D"),]
+all_model_ranef <- ranef(all_model, condVar=TRUE)
 dotplot(all_model_ranef)
 df_ranefs <- as.data.frame(all_model_ranef)
 #below this groups the ranefs
 has_zero <- function(condval, condsd){
 bounds <- condsd * 1.96
 return(ifelse(((condval - bounds) < 0),ifelse(((condval + bounds) > 0), 1, 0), 2))
 }
 df_ranefs <- df_ranefs |>
 mutate(ranef_grouping = has_zero(condval, condsd)) |>
 mutate(rank = rank(condval))
 D_df_ranef <- df_ranefs[which(df_ranefs$term == "D"),]
 D_df_ranef |>
 ggplot(aes(x=rank, y=condval, col = as.factor(ranef_grouping))) +
-geom_point()
+geom_linerange(aes(ymin= condval - (1.96 * condsd), ymax= condval + (1.96 * condsd))) +
 theme_bw()
 D_df_ranefs <- D_df_ranefs |>
 mutate(rank = rank(condval))
 D_df_ranef <- D_df_ranef |>
 mutate(rank = rank(condval))
 D_df_ranef |>
 ggplot(aes(x=rank, y=condval, col = as.factor(ranef_grouping))) +
-geom_linerange(aes(ymin= condval - (1.96 * condsd), ymax= condval + (1.96 * condsd)))
+geom_linerange(aes(ymin= condval - (1.96 * condsd), ymax= condval + (1.96 * condsd))) +
-D_df_ranef |>
+theme_bw()
-ggplot(aes(x=grp, y=condval, col = as.factor(ranef_grouping))) +
+#identifying the quartiles of effect for D
-geom_linerange(aes(ymin= condval - (1.96 * condsd), ymax= condval + (1.96 * condsd)))
+all_model_blup <- blup(all_model)
-D_df_ranef |>
+all_model_ranef <- ranef(all_model)
-ggplot(aes(x=rank, y=condval, col = as.factor(ranef_grouping))) +
+View(all_model_ranef)
-geom_linerange(aes(ymin= condval - (1.96 * condsd), ymax= condval + (1.96 * condsd)))
+df_ranefs <- as.data.frame(all_model_ranef)
-# mrg behavior for this
+dotplot(all_model_ranef)
-mrg_model <- lmer(log1p_count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data, REML=FALSE, control = lmerControl(
+#identifying the quartiles of effect for D
 all_model_coef <- coef(all_model)
 View(all_model_coef)
 D_df_ranef <- df_ranefs[which(df_ranefs$term == "D"),]
 D_df_ranef <- df_ranefs[which(df_ranefs$term == "D"),]
 View(D_df_ranef)
 all_model <- lmer(log1p_count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data, REML=FALSE, control = lmerControl(
 optimizer ='optimx', optCtrl=list(method='L-BFGS-B')))
 all_model_ranef <- ranef(all_model)
 df_ranefs <- as.data.frame(all_model_ranef)
 D_df_ranef <- df_ranefs[which(df_ranefs$term == "D"),]
 View(D_df_ranef)
 #identifying the quartiles of effect for D
 all_model_variances <- postVar(all_model)
 #identifying the quartiles of effect for D
 all_model_variances <- vcov(all_model, condVar=TRUE)
 View(all_model_variances)
 print(all_model_variances)
 View(all_model_variances)
 conditional_variances_random <- lapply(all_model_variances, diag)
 dotplot(conditional_variances_random)
 dotplot(conditional_variances_random,
 col = "blue",
 pch = 19,
 main = "Conditional Variances of Random Effects",
 xlab = "Conditional Variance",
 ylab = "Random Effect",
 scales = list(x = list(log = TRUE)),
 auto.key = list(space = "right"))
 #identifying the quartiles of effect for D
 all_model_variances <- vcov(all_model, full=TRUE, condVar=TRUE)
 View(all_model_variances)
 summary(all_model)
 #identifying the quartiles of effect for D
 all_model_variances <- vcov(all_model, full=TRUE, condVar=TRUE)
 View(all_model_variances)
 #identifying the quartiles of effect for D
 all_model_variances <- varCorr(all_model)
 #identifying the quartiles of effect for D
 all_model_variances <- VarCorr(all_model)
 View(all_model_variances)
 View(conditional_variances_random)
 View(all_model_variances)
 attr(VarCorr(all_model)$upstream_vcs_link, "stddevs")^2
 values <- attr(VarCorr(all_model)$upstream_vcs_link, "stddevs")^2
 #identifying the quartiles of effect for D
 all_model_variances <- vcov(all_model)
 View(all_model_variances)
 print(all_model_variances)
 all_model_ranef <- ranef(all_model)$upstream_vcs_link
 View(all_model_ranef)
 all_model_ranef <- cov(ranef(all_model))
 random_effects <- ranef(all_model)
 random_effects_variances <- lapply(random_effects$upstream_vcs_link, function(x) {
 variances <- var(x$D:I(week_offset))
 return(variances)
 })
 variances <- var(x$D)
 summary_of_all <- summary(all_model)
 #identifying the quartiles of effect for D
 variance_components <- summary_of_all$varcor
 View(variance_components)
 all_model <- lmer(log1p_count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data, REML=FALSE, control = lmerControl(
 optimizer ='optimx', optCtrl=list(method='L-BFGS-B')))
 #identifying the quartiles of effect for D
-mrg_model_ranef <- ranef(mrg_model, condVar=TRUE)
+varcorr_of_all <- VarCorr(all_model)
-df_mrg_ranefs <- as.data.frame(mrg_model_ranef)
+View(varcorr_of_all)
-#doing similar random effect analysis for this
+print(varcorr_of_all)
-df_mrg_ranefs <- df_mrg_ranefs |>
+all_coefficients <- coef(all_model)
-mutate(ranef_grouping = has_zero(condval, condsd)) |>
+all_standard_errors <- sqrt(diag(vcov(all_model)))
-mutate(rank = rank(condval))
+all_conf_intervals <- cbind(coefficients - 1.96 * standard_errors,
-D_df_mrg_ranefs <- df_mrg_ranefs[which(df_mrg_ranefs$term == "D"),]
+coefficients + 1.96 * standard_errors)
-D_df_mrg_ranefs  |>
+all_conf_intervals <- cbind(all_coefficients - 1.96 * all_standard_errors,
-ggplot(aes(x=rank, y=condval, col = as.factor(ranef_grouping))) +
+all_coefficients + 1.96 * all_standard_errors)
-geom_linerange(aes(ymin= condval - (1.96 * condsd), ymax= condval + (1.96 * condsd)))
+View(all_coefficients)
-D_df_ranef |>
+View(conditional_variances_random)
-ggplot(aes(x=rank, y=condval, col = as.factor(ranef_grouping))) +
+View(all_coefficients)
-geom_linerange(aes(ymin= condval - (1.96 * condsd), ymax= condval + (1.96 * condsd)))
+#identifying the quartiles of effect for D
 confint(all_model)
 all_coefficients <- coef(all_model)
 all_standard_errors <- sqrt(diag(vcov(all_model)))[3]
 all_standard_errors <- sqrt(diag(vcov(all_model)))
 all_standard_errors <- sqrt(diag(vcov(all_model)))[4]
 all_standard_errors <- sqrt(diag(vcov(all_model)))[5]
 all_standard_errors <- sqrt(diag(vcov(all_model)))[6]
 all_standard_errors <- sqrt(diag(vcov(all_model)))[1]
 #identifying the quartiles of effect for D
 all_model_ranef <- ranef(all_model, condVar=TRUE)
 #identifying the quartiles of effect for D
 all_model_ranef_condvar <- ranef(all_model, condVar = TRUE)
 all_model_ranef <- ranef(all_model, condVar = FALSE)
 View(all_model_ranef)
 View(all_model_ranef_condvar)
 dotplot(all_model_ranef)
 dotplot(all_model_ranef_condvar)
 View(all_model_ranef_condvar)
 all_model_ranef_condvar[["upstream_vcs_link"]][["D"]]
 View(all_model_ranef)
 all_model_ranef_condvar$upstream_vcs_link
 all_model_ranef_condvar$upstream_vcs_link$D
 conditional_variances <- diag(vcov(model)$upstream_vcs_link$D)
 conditional_variances <- diag(vcov(all_model)$upstream_vcs_link$D)
 conditional_variances <- diag(vcov(all_model))
 conditional_variances <- vcov(all_model)
 View(conditional_variances)
 #identifying the quartiles of effect for D
 all_model_ranef_condvar <- var(ranef(all_model, condVar = TRUE))
 #identifying the quartiles of effect for D
 all_model_ranef_condvar <- var(ranef(all_model, condVar = TRUE)$upstream_vcs_link$D)
 #identifying the quartiles of effect for D
 all_model_ranef_condvar <- ranef(all_model, condVar = TRUE)$upstream_vcs_link$D
 #identifying the quartiles of effect for D
 all_model_ranef_condvar <- ranef(all_model, condVar = TRUE)
 View(all_model_ranef_condvar)
 #identifying the quartiles of effect for D
 all_model_ranef_condvar <- ranef(all_model, condVar = TRUE)
 View(all_model_ranef_condvar)
 attr(all_model_ranef_condvar$upstream_vcs_link$D, "condVar")
 attr(all_model_ranef_condvar$upstream_vcs_link, "condVar")
 df_ranefs <- as.data.frame(all_model_ranef_condvar)
 View(df_ranefs)
 View(all_model_ranef_condvar)
 #all_model <- lmer(log1p_count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data, REML=FALSE, control = lmerControl(
 #  optimizer ='optimx', optCtrl=list(method='L-BFGS-B')))
 all_model <- lmer(log1p_count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data, REML=FALSE)
 #identifying the quartiles of effect for D
 all_model_ranef_condvar <- ranef(all_model, condVar = TRUE)
 attr(all_model_ranef_condvar$upstream_vcs_link, "condVar")
 #all_model <- lmer(log1p_count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data, REML=FALSE, control = lmerControl(
 #  optimizer ='optimx', optCtrl=list(method='L-BFGS-B')))
 all_model <- lmer(log1p_count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data, REML=TRUE)
 #identifying the quartiles of effect for D
 all_model_ranef_condvar <- ranef(all_model, condVar = TRUE)
 attr(all_model_ranef_condvar$upstream_vcs_link, "condVar")
 df_ranefs <- as.data.frame(all_model_ranef_condvar)
 View(df_ranefs)
 #identifying the quartiles of effect for D
 all_model_ranef_condvar <- ranef(all_model, condVar = TRUE)
 View(all_model_ranef_condvar)
 all_model_ranef <- ranef(all_model, condVar = FALSE)
 View(all_model_ranef_condvar)
 View(all_model_ranef_condvar[["upstream_vcs_link"]])
 all_model_ranef_condvar[["upstream_vcs_link"]][["D"]]
 View(all_model_ranef)
 df_rn_no_cv <- as.data.frame(all_model_ranef)
 View(df_rn_no_cv)
 View(df_ranefs)
 attr(all_model_ranef_condvar$upstream_vcs_link, "postVar")
 attr(all_model_ranef_condvar$upstream_vcs_link$D, "postVar")
 attr(all_model_ranef_condvar$upstream_vcs_link, "postVar")
 attr(all_model_ranef_condvar$upstream_vcs_link, "postVar")[[4]]
 attr(all_model_ranef_condvar$upstream_vcs_link, "postVar")[[3]]
 attr(all_model_ranef_condvar$upstream_vcs_link, "postVar")[[2]]
 attr(all_model_ranef_condvar$upstream_vcs_link, "postVar")[4]
 attr(all_model_ranef_condvar$upstream_vcs_link, "postVar")
 all_model <- lmer(log1p_count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data, REML=FALSE, control = lmerControl(
 optimizer ='optimx', optCtrl=list(method='L-BFGS-B')))
 isSingular(all_model)
 all_model <- lmer(log1p_count ~ D * I(week_offset)+ scaled_project_age + (week_offset| upstream_vcs_link), data=all_actions_data, REML=FALSE, control = lmerControl(
 optimizer ='optimx', optCtrl=list(method='L-BFGS-B')))
 all_model <- lmer(log1p_count ~ D * I(week_offset)+ scaled_project_age + (week_offset| upstream_vcs_link), data=all_actions_data, REML=FALSE)
 all_model <- lmer(log1p_count ~ D * I(week_offset)+ scaled_project_age + (I:(week_offset)| upstream_vcs_link), data=all_actions_data, REML=FALSE)
 all_model <- lmer(log1p_count ~ D * I(week_offset)+ scaled_project_age + (I(week_offset)| upstream_vcs_link), data=all_actions_data, REML=FALSE)
 all_model <- lmer(log1p_count ~ D * I(week_offset)+ scaled_project_age + (D| upstream_vcs_link), data=all_actions_data, REML=FALSE)
 summary_of_all <- summary(all_model)
 summary(all_model)
 #identifying the quartiles of effect for D
 all_model_ranef_condvar <- ranef(all_model, condVar = TRUE)
 attr(all_model_ranef_condvar$upstream_vcs_link, "postVar")
 all_model <- lmer(log1p_count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data, REML=FALSE, control = lmerControl(
 optimizer ='optimx', optCtrl=list(method='L-BFGS-B')))
 # this is the file with the lmer multi-level rddAnalysis
 library(tidyverse)
 library(plyr)
-#get the contrib data instead
+# 0 loading the readme data in
 try(setwd(dirname(rstudioapi::getActiveDocumentContext()$path)))
-contrib_df <- read_csv("../final_data/deb_contrib_did.csv")
+readme_df <- read_csv("../final_data/deb_readme_did.csv")
-#some preprocessing and expansion
+# 1 preprocessing
 #colnames(readme_df) <- c("upstream_vcs_link", "event_date", "event_hash", "before_all_ct", "before_mrg_ct", "after_all_ct", "after_mrg_ct", "before_auth_new", "after_commit_new", "after_auth_new", "before_commit_new")
 col_order <- c("upstream_vcs_link", "age_of_project", "event_date", "event_hash", "before_all_ct", "after_all_ct", "before_mrg_ct", "after_mrg_ct", "before_auth_new", "after_auth_new", "before_commit_new",  "after_commit_new")
-contrib_df <- contrib_df[,col_order]
+readme_df <- readme_df[,col_order]
-contrib_df$ct_before_all <- str_split(gsub("[][]","", contrib_df$before_all_ct), ", ")
+readme_df$ct_before_all <- str_split(gsub("[][]","", readme_df$before_all_ct), ", ")
-contrib_df$ct_after_all <- str_split(gsub("[][]","", contrib_df$after_all_ct), ", ")
+readme_df$ct_after_all <- str_split(gsub("[][]","", readme_df$after_all_ct), ", ")
-contrib_df$ct_before_mrg <- str_split(gsub("[][]","", contrib_df$before_mrg_ct), ", ")
+readme_df$ct_before_mrg <- str_split(gsub("[][]","", readme_df$before_mrg_ct), ", ")
-contrib_df$ct_after_mrg <- str_split(gsub("[][]","", contrib_df$after_mrg_ct), ", ")
+readme_df$ct_after_mrg <- str_split(gsub("[][]","", readme_df$after_mrg_ct), ", ")
 drop <- c("before_all_ct", "before_mrg_ct", "after_all_ct", "after_mrg_ct")
-contrib_df = contrib_df[,!(names(contrib_df) %in% drop)]
+readme_df = readme_df[,!(names(readme_df) %in% drop)]
 # 2 some expansion needs to happens for each project
 expand_timeseries <- function(project_row) {
 longer <- project_row |>
@ -419,9 +321,9 @@ longer$count <- as.numeric(longer$count)
 #longer <- longer[which(longer$observation_type == "all"),]
 return(longer)
 }
-expanded_data <- expand_timeseries(contrib_df[1,])
+expanded_data <- expand_timeseries(readme_df[1,])
-for (i in 2:nrow(contrib_df)){
+for (i in 2:nrow(readme_df)){
-expanded_data <- rbind(expanded_data, expand_timeseries(contrib_df[i,]))
+expanded_data <- rbind(expanded_data, expand_timeseries(readme_df[i,]))
 }
 #filter out the windows of time that we're looking at
 window_num <- 8
@ -434,79 +336,177 @@ windowed_data$week_offset <- windowed_data$week - 27
 #separate out the cleaning d
 all_actions_data <- windowed_data[which(windowed_data$observation_type == "all"),]
 mrg_actions_data <- windowed_data[which(windowed_data$observation_type == "mrg"),]
 all_actions_data$logged_count <- log(all_actions_data$count)
 all_actions_data$log1p_count <- log1p(all_actions_data$count)
-# now for merge
+# 3 rdd in lmer analysis
-mrg_actions_data$logged_count <- log(mrg_actions_data$count)
+# rdd: https://rpubs.com/phle/r_tutorial_regression_discontinuity_design
-mrg_actions_data$log1p_count <- log1p(mrg_actions_data$count)
+# lmer: https://www.youtube.com/watch?v=LzAwEKrn2Mc
 #TKTK ---------------------
 #imports for models
 library(lme4)
 # https://www.bristol.ac.uk/cmm/learning/videos/random-intercepts.html#exvar
 library(optimx)
 library(lattice)
-#models -- TKTK need to be fixed
+all_model <- lmer(log1p_count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data, REML=FALSE, control = lmerControl(
 all_model <- lmer(log1p_count ~ D * I(week_offset)+ scaled_project_age + (week_offset| upstream_vcs_link), data=all_actions_data, REML=FALSE, control = lmerControl(
 optimizer ='optimx', optCtrl=list(method='L-BFGS-B')))
 summary(all_model)
 #identifying the quartiles of effect for D
-all_model_ranef <- ranef(all_model)
+mmcm = coef(all_model)$upstream_vcs_link[, 1]
-#d_effect_ranef_all <- all_model_ranef[all_model_ranef$term=="D",]
+vcov.vals = as.data.frame(VarCorr(all_model))
-#d_effect_ranef_all$quartile <- ntile(d_effect_ranef_all$condval, 4)
+View(vcov.vals)
-df_ranefs <- as.data.frame(all_model_ranef)
+#identifying the quartiles of effect for D
-has_zero <- function(condval, condsd){
+mmcm = coef(all_model)$upstream_vcs_link
-bounds <- condsd * 1.96
+View(mmcm)
-return(ifelse(((condval - bounds) < 0),ifelse(((condval + bounds) > 0), 1, 0), 2))
+summary(all_model)$coef[,2]
 View(mmcm)
 variance_components <- VarCorr(all_model)
 group_variance <- attr(variance_components$upstream_vcs_link, "stddev")^2
 View(mmcm)
 fixef(all())
 fixef(all_model
 summary(all_model)$coef[,2]
 fixef(all_model)
 fixed_impacts = fixef(all_model)
 dotplot(all_model_ranef_condvar)
 all_model_ranef_condvar <- ranef(all_model, condVar = TRUE)
 dotplot(all_model_ranef_condvar)
 broom.mixed::tidy(all_model, effects = "ran_vals", conf.int = TRUE)
 test <- broom.mixed::tidy(all_model, effects = "ran_vals", conf.int = TRUE)
 View(test)
 all_gmodel <- glmer(log1p_count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data, family = Gamma)
 all_gmodel <- glmer(count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data, family = Gamma)
 all_gmodel <- glmer(log1p_count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data, family=poisson)
 all_gmodel <- glmer(log1p_count ~ D * I(week_offset)+ scaled_project_age + (D | upstream_vcs_link), data=all_actions_data, family=poisson)
 all_gmodel <- glmer(log1p_count ~ D * I(week_offset)+ scaled_project_age + (D | upstream_vcs_link), data=all_actions_data, family=binomial)
 all_gmodel <- glmer(count ~ D * I(week_offset)+ scaled_project_age + (D | upstream_vcs_link), data=all_actions_data, family=binomial)
 df_ranefs <- as.data.frame(all_model_ranef_condvar)
 all_gmodel <- glmer(count ~ D * I(week_offset)+ scaled_project_age + (D | upstream_vcs_link), data=all_actions_data, family=binomial)
 all_gmodel <- glmer(count ~ D * I(week_offset)+ scaled_project_age + (1 | upstream_vcs_link), data=all_actions_data, family=poisson)
 all_model_ranef_condvar <- ranef(all_gmodel, condVar = TRUE)
 all_model_ranef_condvar <- ranef(all_model, condVar = TRUE)
 all_gmodel_ranef_condvar <- ranef(all_gmodel, condVar = TRUE)
 View(all_gmodel_ranef_condvar)
 test <- broom.mixed::tidy(all_gmodel, effects = "ran_vals", conf.int = TRUE)
 View(test)
 all_gmodel <- glmer(count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)+ scaled_project_age | upstream_vcs_link), data=all_actions_data)
 test <- broom.mixed::tidy(all_gmodel, effects = "ran_vals", conf.int = TRUE)
 View(test)
 summary(all_gmodel)
 all_model <- lmer(log1p_count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data, REML=TRUE, control = lmerControl(
 optimizer ='optimx', optCtrl=list(method='L-BFGS-B')))
 test <- broom.mixed::tidy(all_model, effects = "ran_vals", conf.int = TRUE)
 View(test)
 test_condvals <- broom.mixed::tidy(all_gmodel, effects = "ran_vals", conf.int = TRUE)
 View(test_condvals)
 test_glmer_ranef_D <- test_condvals [which(test_condvals $term == "D"),]
 View(test_glmer_ranef_D)
 test_glmer_ranef_D <- test_condvals [which(test_condvals $term == "D"),]
 has_zero <- function(estimate, low, high){
 return(ifelse((low < 0),ifelse((high > 0), 1, 0), 2))
 }
-df_ranefs <- df_ranefs |>
+test_glmer_ranef_D <- test_glmer_ranef_D |>
-mutate(ranef_grouping = has_zero(condval, condsd)) |>
+mutate(ranef_grouping = has_zero(estimate, conf.low, conf.high)) |>
-mutate(rank = rank(condval))
+mutate(rank = rank(estimate))
-wo_df_ranef <- df_ranefs[which(df_ranefs$term == "week_offset"),]
+test_glmer_ranef_D |>
 library(ggplot2)
 wo_df_ranef |>
 ggplot(aes(x=rank, y=condval, col = as.factor(ranef_grouping))) +
-geom_linerange(aes(ymin= condval - (1.96 * condsd), ymax= condval + (1.96 * condsd)))
+geom_linerange(aes(ymin= conf.low, ymax= conf.high)) +
 wo_df_ranef |>
 ggplot(aes(x=rank, y=condval, col = as.factor(ranef_grouping))) +
 geom_linerange(aes(ymin= condval - (1.96 * condsd), ymax= condval + (1.96 * condsd))) +
 geom_bw()
 wo_df_ranef |>
 ggplot(aes(x=rank, y=condval, col = as.factor(ranef_grouping))) +
 geom_linerange(aes(ymin= condval - (1.96 * condsd), ymax= condval + (1.96 * condsd))) +
 theme_bw()
-wo_df_ranef |>
+test_glmer_ranef_D |>
-ggplot(aes(x=rank, y=condval, col = as.factor(ranef_grouping))) +
+ggplot(aes(x=rank, y=estimate, col = as.factor(ranef_grouping))) +
-geom_pointrange(aes(ymin= condval - (1.96 * condsd), ymax= condval + (1.96 * condsd))) +
+geom_linerange(aes(ymin= conf.low, ymax= conf.high)) +
 theme_bw()
-wo_df_ranef |>
+summary(all_gmodel)
-ggplot(aes(x=rank, y=condval, col = as.factor(ranef_grouping))) +
+all_gmodel <- glmer(count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data)
-geom_crossbar(aes(ymin= condval - (1.96 * condsd), ymax= condval + (1.96 * condsd)), width=0.2) +
+summary(all_gmodel)
 test_condvals <- broom.mixed::tidy(all_gmodel, effects = "ran_vals", conf.int = TRUE)
 test_glmer_ranef_D <- test_condvals [which(test_condvals $term == "D"),]
 has_zero <- function(estimate, low, high){
 return(ifelse((low < 0),ifelse((high > 0), 1, 0), 2))
 }
 test_glmer_ranef_D <- test_glmer_ranef_D |>
 mutate(ranef_grouping = has_zero(estimate, conf.low, conf.high)) |>
 mutate(rank = rank(estimate))
 test_glmer_ranef_D |>
 ggplot(aes(x=rank, y=estimate, col = as.factor(ranef_grouping))) +
 geom_linerange(aes(ymin= conf.low, ymax= conf.high)) +
 theme_bw()
-wo_df_ranef |>
+View(test_glmer_ranef_D)
-ggplot(aes(x=rank, y=condval, col = as.factor(ranef_grouping))) +
+View(test_condvals)
-geom_linerange(aes(ymin= condval - (1.96 * condsd), ymax= condval + (1.96 * condsd))) +
+all_gmodel <- glmer(count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data)
 summary(all_gmodel)
 test_condvals <- broom.mixed::tidy(all_gmodel, effects = "ran_vals", conf.int = TRUE)
 View(test_condvals)
 all_gmodel <- glmer(count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data, family = Poisson)
 all_gmodel <- glmer(count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data, family = poisson)
 summary(all_gmodel)
 all_gmodel <- glmer(count ~ D * I(week_offset)+ scaled_project_age + (D | upstream_vcs_link), data=all_actions_data, family = poisson)
 summary(all_gmodel)
 test_condvals <- broom.mixed::tidy(all_gmodel, effects = "ran_vals", conf.int = TRUE)
 test_glmer_ranef_D <- test_condvals [which(test_condvals $term == "D"),]
 has_zero <- function(estimate, low, high){
 return(ifelse((low < 0),ifelse((high > 0), 1, 0), 2))
 }
 test_glmer_ranef_D <- test_glmer_ranef_D |>
 mutate(ranef_grouping = has_zero(estimate, conf.low, conf.high)) |>
 mutate(rank = rank(estimate))
 test_glmer_ranef_D |>
 ggplot(aes(x=rank, y=estimate, col = as.factor(ranef_grouping))) +
 geom_linerange(aes(ymin= conf.low, ymax= conf.high)) +
 theme_bw()
-wo_df_ranef |>
+all_gmodel <- glmer(count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data, family = poisson)
-ggplot(aes(x=grp, y=condval, col = as.factor(ranef_grouping))) +
+all_gmodel <- glmer(count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data, nAGQ=0, family = poisson)
-geom_linerange(aes(ymin= condval - (1.96 * condsd), ymax= condval + (1.96 * condsd))) +
+summary(all_gmodel)
 test_condvals <- broom.mixed::tidy(all_gmodel, effects = "ran_vals", conf.int = TRUE)
 test_glmer_ranef_D <- test_condvals [which(test_condvals $term == "D"),]
 has_zero <- function(estimate, low, high){
 return(ifelse((low < 0),ifelse((high > 0), 1, 0), 2))
 }
 test_glmer_ranef_D <- test_glmer_ranef_D |>
 mutate(ranef_grouping = has_zero(estimate, conf.low, conf.high)) |>
 mutate(rank = rank(estimate))
 test_glmer_ranef_D |>
 ggplot(aes(x=rank, y=estimate, col = as.factor(ranef_grouping))) +
 geom_linerange(aes(ymin= conf.low, ymax= conf.high)) +
 theme_bw()
-wo_df_ranef <- wo_df_ranef |>
+variance(all_actions_data$log1p_count)
-arrange(condval)
+var(all_actions_data$log1p_count)
-wo_df_ranef |>
+mean (all_actions_data$log1p_count)
-ggplot(aes(x=grp, y=condval, col = as.factor(ranef_grouping))) +
+#all_gmodel <- glmer(count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data, nAGQ=0, family = poisson)
-geom_linerange(aes(ymin= condval - (1.96 * condsd), ymax= condval + (1.96 * condsd))) +
+all_gmodel <- glmer.nb(count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link),data=all_actions_data)
-theme_bw()
+#all_gmodel <- glmer(count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data, nAGQ=0, family = poisson)
-View(wo_df_ranef)
+all_gmodel <- glmer.nb(count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link),
-df_ranefs <- df_ranefs |>
+control=glmerControl(optimizer="bobyqa",
-mutate(ranef_grouping = has_zero(condval, condsd))
+optCtrl=list(maxfun=2e5)), data=all_actions_data)
-wo_df_ranef <- df_ranefs[which(df_ranefs$term == "week_offset"),]
+#all_gmodel <- glmer(count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data, nAGQ=0, family = poisson)
-wo_df_ranef <- wo_df_ranef |>
+all_gmodel <- glmer.nb(count ~ D * I(week_offset)+ scaled_project_age + (D | upstream_vcs_link),
-mutate(rank = rank(condval))
+control=glmerControl(optimizer="bobyqa",
-library(ggplot2)
+optCtrl=list(maxfun=2e5)), data=all_actions_data)
-wo_df_ranef |>
+summary(all_gmodel)
-ggplot(aes(x=grp, y=condval, col = as.factor(ranef_grouping))) +
+test_condvals <- broom.mixed::tidy(all_gmodel, effects = "ran_vals", conf.int = TRUE)
-geom_linerange(aes(ymin= condval - (1.96 * condsd), ymax= condval + (1.96 * condsd))) +
+test_glmer_ranef_D <- test_condvals [which(test_condvals $term == "D"),]
-theme_bw()
+has_zero <- function(estimate, low, high){
-wo_df_ranef |>
+return(ifelse((low < 0),ifelse((high > 0), 1, 0), 2))
-ggplot(aes(x=rank, y=condval, col = as.factor(ranef_grouping))) +
+}
-geom_linerange(aes(ymin= condval - (1.96 * condsd), ymax= condval + (1.96 * condsd))) +
+test_glmer_ranef_D <- test_glmer_ranef_D |>
 mutate(ranef_grouping = has_zero(estimate, conf.low, conf.high)) |>
 mutate(rank = rank(estimate))
 test_glmer_ranef_D |>
 ggplot(aes(x=rank, y=estimate, col = as.factor(ranef_grouping))) +
 geom_linerange(aes(ymin= conf.low, ymax= conf.high)) +
 theme_bw()
 #all_gmodel <- glmer(count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data, nAGQ=0, family = poisson)
 #all_gmodel <- glmer.nb(count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset) | upstream_vcs_link),
 #                       control=glmerControl(optimizer="bobyqa",
 #                                            optCtrl=list(maxfun=2e5)), data=all_actions_data)
 all_gmodel <- glmer.nb(count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset) | upstream_vcs_link), data=all_actions_data)
 #all_gmodel <- glmer(count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data, nAGQ=0, family = poisson)
 #all_gmodel <- glmer.nb(count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset) | upstream_vcs_link),
 #                       control=glmerControl(optimizer="bobyqa",
 #                                            optCtrl=list(maxfun=2e5)), data=all_actions_data)
 all_gmodel <- glmer.nb(count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset) | upstream_vcs_link), data=all_actions_data, verbose=TRUE)
 #all_gmodel <- glmer(count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data, nAGQ=0, family = poisson)
 #all_gmodel <- glmer.nb(count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset) | upstream_vcs_link),
 #                       control=glmerControl(optimizer="bobyqa",
 #                                            optCtrl=list(maxfun=2e5)), data=all_actions_data)
 all_gmodel <- glmer.nb(count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)  | upstream_vcs_link), data=all_actions_data)
 #all_gmodel <- glmer(count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data, nAGQ=0, family = poisson)
 #all_gmodel <- glmer.nb(count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset) | upstream_vcs_link),
 #                       control=glmerControl(optimizer="bobyqa",
 #                                            optCtrl=list(maxfun=2e5)), data=all_actions_data)
 all_gmodel <- glmer.nb(count ~ D * I(week_offset)+ scaled_project_age + (D | upstream_vcs_link), data=all_actions_data)
--- a/R/readmeRDDAnalysis.R
+++ b/R/readmeRDDAnalysis.R
@ -63,18 +63,39 @@ all_model <- lmer(log1p_count ~ D * I(week_offset)+ scaled_project_age + (D * I(
  optimizer ='optimx', optCtrl=list(method='L-BFGS-B')))
 summary_of_all <- summary(all_model)
 #identifying the quartiles of effect for D
 mmcm = coef(all_model)$upstream_vcs_link
 fixed_impacts = fixef(all_model)
 summary(all_model)$coef[,2]
 variance_components <- VarCorr(all_model)
 all_model_ranef_condvar <- ranef(all_model, condVar = TRUE)
-all_model_ranef <- ranef(all_model, condVar = FALSE)
+dotplot(all_model_ranef_condvar)
 test <- broom.mixed::tidy(all_model, effects = "ran_vals", conf.int = TRUE)
 attr(all_model_ranef_condvar$upstream_vcs_link, "postVar")
 all_coefficients <- coef(all_model)
 all_standard_errors <- sqrt(diag(vcov(all_model)))[1]
 #all_conf_intervals <- cbind(all_coefficients - 1.96 * all_standard_errors, 
 #                        all_coefficients + 1.96 * all_standard_errors)
-df_ranefs <- as.data.frame(all_model_ranef_condvar)
+var(all_actions_data$log1p_count) # 1.125429
-df_rn_no_cv <- as.data.frame(all_model_ranef)
+mean (all_actions_data$log1p_count) # 0.6426873
-D_df_ranef <- df_ranefs[which(df_ranefs$term == "D"),]
+
 #all_gmodel <- glmer(count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data, nAGQ=0, family = poisson)
 #all_gmodel <- glmer.nb(count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset) | upstream_vcs_link),
 #                       control=glmerControl(optimizer="bobyqa",
 #                                            optCtrl=list(maxfun=2e5)), data=all_actions_data)
 all_gmodel <- glmer.nb(count ~ D * I(week_offset)+ scaled_project_age + (D | upstream_vcs_link), data=all_actions_data)
 summary(all_gmodel)
 test_condvals <- broom.mixed::tidy(all_gmodel, effects = "ran_vals", conf.int = TRUE)
 test_glmer_ranef_D <- test_condvals [which(test_condvals $term == "D"),]
 has_zero <- function(estimate, low, high){
  return(ifelse((low < 0),ifelse((high > 0), 1, 0), 2))
 }
 test_glmer_ranef_D <- test_glmer_ranef_D |>
  mutate(ranef_grouping = has_zero(estimate, conf.low, conf.high)) |>
  mutate(rank = rank(estimate))
 test_glmer_ranef_D |> 
  ggplot(aes(x=rank, y=estimate, col = as.factor(ranef_grouping))) +
  geom_linerange(aes(ymin= conf.low, ymax= conf.high)) +
  theme_bw()
 #below this groups the ranefs
 """
 has_zero <- function(condval, condsd){