From 9dc810bedf42e4052b556808eb4a4d5cb3690426 Mon Sep 17 00:00:00 2001
From: mjgaughan <mgaughan@proton.me>
Date: Thu, 9 May 2024 17:05:21 -0500
Subject: [PATCH] managing glmer

---
 R/.Rhistory           | 770 +++++++++++++++++++++---------------------
 R/readmeRDDAnalysis.R |  33 +-
 2 files changed, 412 insertions(+), 391 deletions(-)

diff --git a/R/.Rhistory b/R/.Rhistory
index 555a76b..8b90ad9 100644
--- a/R/.Rhistory
+++ b/R/.Rhistory
@@ -1,190 +1,8 @@
-# a) the basic things, in a table:
-#   Condition           Sample Size       mean    standard deviation  standard error
-# Immediately after       2               48.705      1.534422          1.085
-# One day after           2               41.955      2.128391          1.505
-# Three days after        2               21.795      0.7707464         0.545
-# Five days after         2               12.415      1.081873          0.765
-# Seven days after        2               8.32        0.2687006         0.19
-# b) do a one way anova based on the data, like the last homework
-grp <- c(1,1,2,2,3,3,4,4,5,5)
-results <- aov(resp~factor(grp))
-anova(results)
-# c) summarize the data and the means w a plot, boxplot
-means <- c(48.705, 41.955, 21.795, 12.415, 8.32)
-# c) summarize the data and the means w a plot, boxplot
-boxplot(results)
-# c) summarize the data and the means w a plot, boxplot
-boxplot(resp)
-# c) summarize the data and the means w a plot, boxplot
-boxplot(resp)
-# c) summarize the data and the means w a plot, boxplot
-boxplot(resp~grp)
-ALevels <- c(3.36, 3.34, 3.28, 3.20, 3.26, 3.16, 3.25, 3.36, 3.01, 2.92)
-ELevels <- c(94.6, 96.0, 95.7, 93.2, 97.4, 94.3, 95.0, 97.7, 92.3, 95.1)
-Aresults <- aov(Alevels~factor(grp))
-ALevels <- c(3.36, 3.34, 3.28, 3.20, 3.26, 3.16, 3.25, 3.36, 3.01, 2.92)
-ELevels <- c(94.6, 96.0, 95.7, 93.2, 97.4, 94.3, 95.0, 97.7, 92.3, 95.1)
-Aresults <- aov(Alevels~factor(grp))
-ALevels <- c(3.36, 3.34, 3.28, 3.20, 3.26, 3.16, 3.25, 3.36, 3.01, 2.92)
-ELevels <- c(94.6, 96.0, 95.7, 93.2, 97.4, 94.3, 95.0, 97.7, 92.3, 95.1)
-Aresults <- aov(ALevels~factor(grp))
-Eresults <- aov(ELevels~factor(grp))
-# Vitamin A Anova:
-anova(Aresults)
-# Vimain E Anova:
-anova(Eresults)
-# 12.10
-# four groups, how do nemaotodes impact plant growth
-# a)
-zero_nema <- c(10.8, 9.1, 13.5, 9.2)
-thousand_name <-c(11.1, 11.1, 8.2, 11.3)
-thousand_nema <-c(11.1, 11.1, 8.2, 11.3)
-fthousand_nema <- c(5.4, 4.6, 7.4, 5.0)
-tthousand_nema <- c(5.8, 5.3, 3.2, 7.5)
-mean(zero_nema)
-sd(zero_nema)
-mean(thousand_nema)
-sd(thousand_name)
-mean(fthousand_nema)
-sd(fthousand_nema)
-mean(tthousand_nema)
-sd(tthousand_nema)
-# Table
-# Nematodes       Means       StdDev
-#   0             10.65         2.053452
-# 1,000           10.425        1.486327
-# 5,000           5.6           1.243651
-# 10,000          5.45          1.771064
-nema_means <- c(10.65, 10.425, 5.6, 5.45)
-barplot(nema_means)
-# c)
-groupings <- c(1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4)
-resp <- c(zero_nema, thousand_nema, fthousand_nema, tthousand_nema)
-results <- aov(resp~factor(groupings))
-anova(results)
-# 12.5
-# do piano lessons improve spacial temporal
-piano <- c( 2, 5, 7, -2, 2, 7, 4, 1, 0, 7, 3, 4, 3, 4, 9, 4, 5, 2, 9, 6, 0, 3, 6, -1, 3, 4, 6, 7, -2, 7, -3, 3, 4, 4)
-singing <- c(1, -1, 0, 1, -4, 0, 0, 1, 0, -1)
-computer <- c(0, 1, 1, -3, -2, 4, -1, 2, 4, 2,2, 2, -3, -3, 0, 2, 0, -1, 3, -1 )
-none <- c(5, -1, 7, 0, 4, 0, 2, 1, -6, 0, 2, -1, 0, -2)
-size(piano)
-length(piano)
-mean(piano)
-sd(piano)
-sd(piano)/sqrt(lenth(piano))
-sd(piano)/sqrt(length(piano))
-length(singing)
-mean(singing)
-sd(singing)
-sd(signing)/sqrt(length(singing))
-sd(singing)/sqrt(length(singing))
-length(computer)
-mean(computer)
-sd(computer)
-sd(computer)/sqrt(length(computer))
-length(none)
-mean(none)
-sd(none)
-sd(none)/sqrt(14)
-# a) make a table given the sample size
-# Table:
-#   Lessons     Size    Mean    Standard Dev    Standard Error
-#   Piano       34      3.617647  3.055196        0.5239618
-#   Singing     10      -0.3      1.494434        0.4725816
-#   Computer    20      0.45      2.21181         0.4945758
-#   None        14      0.7857143 3.190818        0.8527819
-# b)
-# H0: The spatial-temporal reasoning test results across different lesson groups will be statistically equivalent.
-# Ha: For at least one lesson group, the results of the reasoning test will be statistically different.
-data_panel <- data.frame(
-Y=c(piano, singing, computer, none),
-Site = factor(rep(c("piano", "singing", "computer", "none"), times=c(length(piano), length(computer), length(singing), length(none))))
-)
-data_panel
-tempt <- aov(Y~Site, data=data_panel)
-anova(tempt)
-# 12.6
-TukeyHSD(tempt)
-# Summary: Looking at the TukeyHSD results, there are some interesting notes in
-# where statistically significant variance lies. If we immediately discard the
-# comparisons with large p-values, we are left with three statistically significant
-# ones. One is that students with piano lessons do better than computer lesson learners
-# by an average of 3.5 points, another is that piano outperforms no lessons by about 2.8 points
-# and lastly that singing underperforms piano by about 3.3 points. While this
-# statistical tooling is useful for proving the significance of these differences in
-# performance, we can also evaluate
-means <- c(mean(piano), mean(singing), mean(computer), mean(none))
-barplot(means)
-# (1) - Get the pilot data and clean it
-#source('~/Research/tor_wikipedia_edits/handcoded_edits/inter_coder_reliability_ns0.R')
-#source ('/data/users/mgaughan/kkex_data_110823_3')
-data1 <- read_csv('../power_data_111023_mmt.csv',show_col_types = FALSE)
-library(readr)
-library(ggplot2)
-# (1) - Get the pilot data and clean it
-#source('~/Research/tor_wikipedia_edits/handcoded_edits/inter_coder_reliability_ns0.R')
-#source ('/data/users/mgaughan/kkex_data_110823_3')
-data1 <- read_csv('../power_data_111023_mmt.csv',show_col_types = FALSE)
-data2 <- read_csv('../inst_all_packages_full_results.csv')
-# (1) - Get the pilot data and clean it
-#source('~/Research/tor_wikipedia_edits/handcoded_edits/inter_coder_reliability_ns0.R')
-#source ('/data/users/mgaughan/kkex_data_110823_3')
-data1 <- read_csv('../power_data_111023_mmt.csv',show_col_types = FALSE)
-library(readr)
-library(ggplot2)
-# (1) - Get the pilot data and clean it
-#source('~/Research/tor_wikipedia_edits/handcoded_edits/inter_coder_reliability_ns0.R')
-#source ('/data/users/mgaughan/kkex_data_110823_3')
-data1 <- read_csv('../power_data_111023_mmt.csv',show_col_types = FALSE)
-data1 <- read_csv('../expanded_data_final.csv',show_col_types = FALSE)
-# Use pilot project data to calculate power of a full study through simulation
-#
-# Parts:
-# (0) - Setup
-# (1) - Get the pilot data and clean it
-# (2) - Run the model on the pilot data and extract effects
-# (3) - Set up and run the simulation
-# ====> Set variables at the arrows <====
-#
-##############################################################################
-rm(list=ls())
-set.seed(424242)
-library(readr)
-library(ggplot2)
-data1 <- read_csv('../expanded_data_final.csv',show_col_types = FALSE)
-set.seed(424242)
-library(readr)
-library(ggplot2)
-data1 <- read_csv('../expanded_data_final.csv',show_col_types = FALSE)
-#shows the cross-age downward slopes for all underproduction averages in the face of MMT
-g3 <- ggplot(data1, aes(x=mmt, y=underproduction_mean)) +
-geom_smooth(mapping = aes(x=mmt, y=underproduction_mean, color=new.age.factor),
-method='lm', formula= y~x) +
-xlab("MMT") +
-ylab("Underproduction Factor") +
 theme_bw()
-g3
-library(readr)
-library(ggplot2)
-data1 <- read_csv('../expanded_data_final.csv',show_col_types = FALSE)
-mean(data1$milestone_count)
-data1$mmt <- (((data1$collaborators * 2)+ data1$contributors) / (data1$contributors + data1$collaborators)) - 1
-mean(data1$mmt)
-rm(list=ls())
-set.seed(424242)
-library(readr)
-library(ggplot2)
-data1 <- read_csv('../expanded_data_final.csv',show_col_types = FALSE)
-library(readr)
-library(ggplot2)
-data1 <- read_csv('../power_data_111023_mmt.csv',show_col_types = FALSE)
-data2 <- read_csv('../inst_all_packages_full_results.csv')
-data1 <- read_csv('../kk_final_expanded_data_final.csv',show_col_types = FALSE)
-library(readr)
-library(ggplot2)
-library(tidyverse)
-data1 <- read_csv('../kk_final_expanded_data_final.csv',show_col_types = FALSE)
+wo_df_ranef |>
+ggplot(aes(x=rank, y=condval, col = as.factor(ranef_grouping))) +
+geom_linerange(aes(ymin= condval - (1.96 * condsd), ymax= condval + (1.96 * condsd))) +
+theme_bw()
 # this is the file with the lmer multi-level rddAnalysis
 library(tidyverse)
 library(plyr)
@@ -229,6 +47,8 @@ windowed_data$week_offset <- windowed_data$week - 27
 #separate out the cleaning d
 all_actions_data <- windowed_data[which(windowed_data$observation_type == "all"),]
 mrg_actions_data <- windowed_data[which(windowed_data$observation_type == "mrg"),]
+#find some EDA to identify which types of models might be the best for this
+hist(log(all_actions_data$count))
 all_actions_data$logged_count <- log(all_actions_data$count)
 all_actions_data$log1p_count <- log1p(all_actions_data$count)
 # 3 rdd in lmer analysis
@@ -240,172 +60,254 @@ library(optimx)
 library(lattice)
 all_model <- lmer(log1p_count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data, REML=FALSE, control = lmerControl(
 optimizer ='optimx', optCtrl=list(method='L-BFGS-B')))
+summary(all_model)
 #identifying the quartiles of effect for D
 all_model_ranef <- ranef(all_model, condVar=TRUE)
 dotplot(all_model_ranef)
 df_ranefs <- as.data.frame(all_model_ranef)
-D_df_ranef <- df_ranefs[df_ranefs$term == "D"]
 D_df_ranef <- df_ranefs[which(df_ranefs$term == "D"),]
-View(D_df_ranef)
-has_zero <- function(condval, condsd){
-bounds <- condsd * 1.96
-if ((condval - bounds) < 0){
-if ((condval + bounds) > 0) {
-return(1)
-} else {
-return(0)
-}
-} else {
-return(2)
-}
-}
-df_ranefs |>
-mutate(ranef_grouping = has_zero(condval, condsd))
-has_zero <- function(condval, condsd){
-bounds <- condsd * 1.96
-print(bounds)
-if ((condval - bounds) < 0){
-if ((condval + bounds) > 0) {
-return(1)
-} else {
-return(0)
-}
-} else {
-return(2)
-}
-}
-df_ranefs |>
-mutate(ranef_grouping = has_zero(condval, condsd))
-has_zero <- function(condval, condsd){
-bounds <- condsd * 1.96
-print(condval - bounds)
-if ((condval - bounds) < 0){
-if ((condval + bounds) > 0) {
-return(1)
-} else {
-return(0)
-}
-} else {
-return(2)
-}
-}
-df_ranefs |>
-mutate(ranef_grouping = has_zero(condval, condsd))
+#below this groups the ranefs
 has_zero <- function(condval, condsd){
 bounds <- condsd * 1.96
 return(ifelse(((condval - bounds) < 0),ifelse(((condval + bounds) > 0), 1, 0), 2))
 }
-df_ranefs |>
-mutate(ranef_grouping = has_zero(condval, condsd))
-df_ranefs |>
-mutate(ranef_grouping = has_zero(condval, condsd)) |>
-group_by(ranef_grouping) |>
-summarize(no_rows = length(ranef_grouping))
-df_ranefs |>
-mutate(ranef_grouping = has_zero(condval, condsd)) |>
-group_by(ranef_grouping) |>
-summarize(no_rows = length(as.factor(ranef_grouping)))
-df_ranefs |>
-mutate(ranef_grouping = has_zero(condval, condsd)) |>
-group_by(ranef_grouping) |>
-summarize(no_rows = length(as.factor(ranef_grouping)))
-View(df_ranefs)
-has_zero <- function(condval, condsd){
-bounds <- condsd * 1.96
-return(ifelse(((condval - bounds) < 0),ifelse(((condval + bounds) > 0), 1, 0), 2))
-}
-df_ranefs |>
-mutate(ranef_grouping = has_zero(condval, condsd))
-View(df_ranefs)
 df_ranefs <- df_ranefs |>
-mutate(ranef_grouping = has_zero(condval, condsd))
-View(df_ranefs)
-df_ranefs |>
-group_by(ranef_grouping) |>
-summarise(no_rows = length(ranef_grouping))
-df_ranefs |>
-group_by(ranef_grouping) |>
-summarise(no_rows = length(ranef_grouping))
-df_ranefs |>
-group_by(as.factor(ranef_grouping)) |>
-summarise(no_rows = length(ranef_grouping))
-hist(df_ranefs$ranef_grouping)
+mutate(ranef_grouping = has_zero(condval, condsd)) |>
+mutate(rank = rank(condval))
 D_df_ranef <- df_ranefs[which(df_ranefs$term == "D"),]
-hist(D_df_ranefs$ranef_grouping)
 hist(D_df_ranef$ranef_grouping)
+D_df_ranef |>
+ggplot(aes(x=rank, y=condval, col = as.factor(ranef_grouping))) +
+geom_linerange(aes(ymin= condval - (1.96 * condsd), ymax= condval + (1.96 * condsd))) +
+geom_bw()
 #plot the ranefs
 library(ggplot2)
 D_df_ranef |>
-ggplot(aes(x=grp, y=condval))
-D_df_ranef |>
-ggplot(aes(x=grp, y=condval, col = as.factor(ranef_grouping)))
-D_df_ranef |>
-ggplot(aes(x=condsd, y=condval, col = as.factor(ranef_grouping)))
-D_df_ranef |>
-ggplot(aes(x=condval, y=condval, col = as.factor(ranef_grouping)))
-D_df_ranef |>
-ggplot(aes(x=condval, y=condval, col = as.factor(ranef_grouping))) +
-geom_point()
-D_df_ranef |>
-ggplot(aes(x=grp, y=condval, col = as.factor(ranef_grouping))) +
-geom_point()
-df_ranefs <- df_ranefs |>
-mutate(ranef_grouping = has_zero(condval, condsd))
-D_df_ranef <- df_ranefs[which(df_ranefs$term == "D"),]
-hist(D_df_ranef$ranef_grouping)
+ggplot(aes(x=rank, y=condval, col = as.factor(ranef_grouping))) +
+geom_linerange(aes(ymin= condval - (1.96 * condsd), ymax= condval + (1.96 * condsd))) +
+geom_bw()
 D_df_ranef |>
 ggplot(aes(x=rank, y=condval, col = as.factor(ranef_grouping))) +
-geom_point()
-df_ranefs <- df_ranefs |>
-mutate(ranef_grouping = has_zero(condval, condsd))
-D_df_ranef <- df_ranefs[which(df_ranefs$term == "D"),]
+geom_linerange(aes(ymin= condval - (1.96 * condsd), ymax= condval + (1.96 * condsd))) +
+theme_bw()
+#identifying the quartiles of effect for D
+all_model_ranef <- ranef(all_model, condVar=TRUE)
+dotplot(all_model_ranef)
+df_ranefs <- as.data.frame(all_model_ranef)
+#below this groups the ranefs
+has_zero <- function(condval, condsd){
+bounds <- condsd * 1.96
+return(ifelse(((condval - bounds) < 0),ifelse(((condval + bounds) > 0), 1, 0), 2))
+}
 df_ranefs <- df_ranefs |>
 mutate(ranef_grouping = has_zero(condval, condsd)) |>
 mutate(rank = rank(condval))
 D_df_ranef <- df_ranefs[which(df_ranefs$term == "D"),]
 D_df_ranef |>
 ggplot(aes(x=rank, y=condval, col = as.factor(ranef_grouping))) +
-geom_point()
+geom_linerange(aes(ymin= condval - (1.96 * condsd), ymax= condval + (1.96 * condsd))) +
+theme_bw()
+D_df_ranefs <- D_df_ranefs |>
+mutate(rank = rank(condval))
+D_df_ranef <- D_df_ranef |>
+mutate(rank = rank(condval))
 D_df_ranef |>
 ggplot(aes(x=rank, y=condval, col = as.factor(ranef_grouping))) +
-geom_linerange(aes(ymin= condval - (1.96 * condsd), ymax= condval + (1.96 * condsd)))
-D_df_ranef |>
-ggplot(aes(x=grp, y=condval, col = as.factor(ranef_grouping))) +
-geom_linerange(aes(ymin= condval - (1.96 * condsd), ymax= condval + (1.96 * condsd)))
-D_df_ranef |>
-ggplot(aes(x=rank, y=condval, col = as.factor(ranef_grouping))) +
-geom_linerange(aes(ymin= condval - (1.96 * condsd), ymax= condval + (1.96 * condsd)))
-# mrg behavior for this
-mrg_model <- lmer(log1p_count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data, REML=FALSE, control = lmerControl(
+geom_linerange(aes(ymin= condval - (1.96 * condsd), ymax= condval + (1.96 * condsd))) +
+theme_bw()
+#identifying the quartiles of effect for D
+all_model_blup <- blup(all_model)
+all_model_ranef <- ranef(all_model)
+View(all_model_ranef)
+df_ranefs <- as.data.frame(all_model_ranef)
+dotplot(all_model_ranef)
+#identifying the quartiles of effect for D
+all_model_coef <- coef(all_model)
+View(all_model_coef)
+D_df_ranef <- df_ranefs[which(df_ranefs$term == "D"),]
+D_df_ranef <- df_ranefs[which(df_ranefs$term == "D"),]
+View(D_df_ranef)
+all_model <- lmer(log1p_count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data, REML=FALSE, control = lmerControl(
+optimizer ='optimx', optCtrl=list(method='L-BFGS-B')))
+all_model_ranef <- ranef(all_model)
+df_ranefs <- as.data.frame(all_model_ranef)
+D_df_ranef <- df_ranefs[which(df_ranefs$term == "D"),]
+View(D_df_ranef)
+#identifying the quartiles of effect for D
+all_model_variances <- postVar(all_model)
+#identifying the quartiles of effect for D
+all_model_variances <- vcov(all_model, condVar=TRUE)
+View(all_model_variances)
+print(all_model_variances)
+View(all_model_variances)
+conditional_variances_random <- lapply(all_model_variances, diag)
+dotplot(conditional_variances_random)
+dotplot(conditional_variances_random,
+col = "blue",
+pch = 19,
+main = "Conditional Variances of Random Effects",
+xlab = "Conditional Variance",
+ylab = "Random Effect",
+scales = list(x = list(log = TRUE)),
+auto.key = list(space = "right"))
+#identifying the quartiles of effect for D
+all_model_variances <- vcov(all_model, full=TRUE, condVar=TRUE)
+View(all_model_variances)
+summary(all_model)
+#identifying the quartiles of effect for D
+all_model_variances <- vcov(all_model, full=TRUE, condVar=TRUE)
+View(all_model_variances)
+#identifying the quartiles of effect for D
+all_model_variances <- varCorr(all_model)
+#identifying the quartiles of effect for D
+all_model_variances <- VarCorr(all_model)
+View(all_model_variances)
+View(conditional_variances_random)
+View(all_model_variances)
+attr(VarCorr(all_model)$upstream_vcs_link, "stddevs")^2
+values <- attr(VarCorr(all_model)$upstream_vcs_link, "stddevs")^2
+#identifying the quartiles of effect for D
+all_model_variances <- vcov(all_model)
+View(all_model_variances)
+print(all_model_variances)
+all_model_ranef <- ranef(all_model)$upstream_vcs_link
+View(all_model_ranef)
+all_model_ranef <- cov(ranef(all_model))
+random_effects <- ranef(all_model)
+random_effects_variances <- lapply(random_effects$upstream_vcs_link, function(x) {
+variances <- var(x$D:I(week_offset))
+return(variances)
+})
+variances <- var(x$D)
+summary_of_all <- summary(all_model)
+#identifying the quartiles of effect for D
+variance_components <- summary_of_all$varcor
+View(variance_components)
+all_model <- lmer(log1p_count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data, REML=FALSE, control = lmerControl(
 optimizer ='optimx', optCtrl=list(method='L-BFGS-B')))
 #identifying the quartiles of effect for D
-mrg_model_ranef <- ranef(mrg_model, condVar=TRUE)
-df_mrg_ranefs <- as.data.frame(mrg_model_ranef)
-#doing similar random effect analysis for this
-df_mrg_ranefs <- df_mrg_ranefs |>
-mutate(ranef_grouping = has_zero(condval, condsd)) |>
-mutate(rank = rank(condval))
-D_df_mrg_ranefs <- df_mrg_ranefs[which(df_mrg_ranefs$term == "D"),]
-D_df_mrg_ranefs  |>
-ggplot(aes(x=rank, y=condval, col = as.factor(ranef_grouping))) +
-geom_linerange(aes(ymin= condval - (1.96 * condsd), ymax= condval + (1.96 * condsd)))
-D_df_ranef |>
-ggplot(aes(x=rank, y=condval, col = as.factor(ranef_grouping))) +
-geom_linerange(aes(ymin= condval - (1.96 * condsd), ymax= condval + (1.96 * condsd)))
+varcorr_of_all <- VarCorr(all_model)
+View(varcorr_of_all)
+print(varcorr_of_all)
+all_coefficients <- coef(all_model)
+all_standard_errors <- sqrt(diag(vcov(all_model)))
+all_conf_intervals <- cbind(coefficients - 1.96 * standard_errors,
+coefficients + 1.96 * standard_errors)
+all_conf_intervals <- cbind(all_coefficients - 1.96 * all_standard_errors,
+all_coefficients + 1.96 * all_standard_errors)
+View(all_coefficients)
+View(conditional_variances_random)
+View(all_coefficients)
+#identifying the quartiles of effect for D
+confint(all_model)
+all_coefficients <- coef(all_model)
+all_standard_errors <- sqrt(diag(vcov(all_model)))[3]
+all_standard_errors <- sqrt(diag(vcov(all_model)))
+all_standard_errors <- sqrt(diag(vcov(all_model)))[4]
+all_standard_errors <- sqrt(diag(vcov(all_model)))[5]
+all_standard_errors <- sqrt(diag(vcov(all_model)))[6]
+all_standard_errors <- sqrt(diag(vcov(all_model)))[1]
+#identifying the quartiles of effect for D
+all_model_ranef <- ranef(all_model, condVar=TRUE)
+#identifying the quartiles of effect for D
+all_model_ranef_condvar <- ranef(all_model, condVar = TRUE)
+all_model_ranef <- ranef(all_model, condVar = FALSE)
+View(all_model_ranef)
+View(all_model_ranef_condvar)
+dotplot(all_model_ranef)
+dotplot(all_model_ranef_condvar)
+View(all_model_ranef_condvar)
+all_model_ranef_condvar[["upstream_vcs_link"]][["D"]]
+View(all_model_ranef)
+all_model_ranef_condvar$upstream_vcs_link
+all_model_ranef_condvar$upstream_vcs_link$D
+conditional_variances <- diag(vcov(model)$upstream_vcs_link$D)
+conditional_variances <- diag(vcov(all_model)$upstream_vcs_link$D)
+conditional_variances <- diag(vcov(all_model))
+conditional_variances <- vcov(all_model)
+View(conditional_variances)
+#identifying the quartiles of effect for D
+all_model_ranef_condvar <- var(ranef(all_model, condVar = TRUE))
+#identifying the quartiles of effect for D
+all_model_ranef_condvar <- var(ranef(all_model, condVar = TRUE)$upstream_vcs_link$D)
+#identifying the quartiles of effect for D
+all_model_ranef_condvar <- ranef(all_model, condVar = TRUE)$upstream_vcs_link$D
+#identifying the quartiles of effect for D
+all_model_ranef_condvar <- ranef(all_model, condVar = TRUE)
+View(all_model_ranef_condvar)
+#identifying the quartiles of effect for D
+all_model_ranef_condvar <- ranef(all_model, condVar = TRUE)
+View(all_model_ranef_condvar)
+attr(all_model_ranef_condvar$upstream_vcs_link$D, "condVar")
+attr(all_model_ranef_condvar$upstream_vcs_link, "condVar")
+df_ranefs <- as.data.frame(all_model_ranef_condvar)
+View(df_ranefs)
+View(all_model_ranef_condvar)
+#all_model <- lmer(log1p_count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data, REML=FALSE, control = lmerControl(
+#  optimizer ='optimx', optCtrl=list(method='L-BFGS-B')))
+all_model <- lmer(log1p_count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data, REML=FALSE)
+#identifying the quartiles of effect for D
+all_model_ranef_condvar <- ranef(all_model, condVar = TRUE)
+attr(all_model_ranef_condvar$upstream_vcs_link, "condVar")
+#all_model <- lmer(log1p_count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data, REML=FALSE, control = lmerControl(
+#  optimizer ='optimx', optCtrl=list(method='L-BFGS-B')))
+all_model <- lmer(log1p_count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data, REML=TRUE)
+#identifying the quartiles of effect for D
+all_model_ranef_condvar <- ranef(all_model, condVar = TRUE)
+attr(all_model_ranef_condvar$upstream_vcs_link, "condVar")
+df_ranefs <- as.data.frame(all_model_ranef_condvar)
+View(df_ranefs)
+#identifying the quartiles of effect for D
+all_model_ranef_condvar <- ranef(all_model, condVar = TRUE)
+View(all_model_ranef_condvar)
+all_model_ranef <- ranef(all_model, condVar = FALSE)
+View(all_model_ranef_condvar)
+View(all_model_ranef_condvar[["upstream_vcs_link"]])
+all_model_ranef_condvar[["upstream_vcs_link"]][["D"]]
+View(all_model_ranef)
+df_rn_no_cv <- as.data.frame(all_model_ranef)
+View(df_rn_no_cv)
+View(df_ranefs)
+attr(all_model_ranef_condvar$upstream_vcs_link, "postVar")
+attr(all_model_ranef_condvar$upstream_vcs_link$D, "postVar")
+attr(all_model_ranef_condvar$upstream_vcs_link, "postVar")
+attr(all_model_ranef_condvar$upstream_vcs_link, "postVar")[[4]]
+attr(all_model_ranef_condvar$upstream_vcs_link, "postVar")[[3]]
+attr(all_model_ranef_condvar$upstream_vcs_link, "postVar")[[2]]
+attr(all_model_ranef_condvar$upstream_vcs_link, "postVar")[4]
+attr(all_model_ranef_condvar$upstream_vcs_link, "postVar")
+all_model <- lmer(log1p_count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data, REML=FALSE, control = lmerControl(
+optimizer ='optimx', optCtrl=list(method='L-BFGS-B')))
+isSingular(all_model)
+all_model <- lmer(log1p_count ~ D * I(week_offset)+ scaled_project_age + (week_offset| upstream_vcs_link), data=all_actions_data, REML=FALSE, control = lmerControl(
+optimizer ='optimx', optCtrl=list(method='L-BFGS-B')))
+all_model <- lmer(log1p_count ~ D * I(week_offset)+ scaled_project_age + (week_offset| upstream_vcs_link), data=all_actions_data, REML=FALSE)
+all_model <- lmer(log1p_count ~ D * I(week_offset)+ scaled_project_age + (I:(week_offset)| upstream_vcs_link), data=all_actions_data, REML=FALSE)
+all_model <- lmer(log1p_count ~ D * I(week_offset)+ scaled_project_age + (I(week_offset)| upstream_vcs_link), data=all_actions_data, REML=FALSE)
+all_model <- lmer(log1p_count ~ D * I(week_offset)+ scaled_project_age + (D| upstream_vcs_link), data=all_actions_data, REML=FALSE)
+summary_of_all <- summary(all_model)
+summary(all_model)
+#identifying the quartiles of effect for D
+all_model_ranef_condvar <- ranef(all_model, condVar = TRUE)
+attr(all_model_ranef_condvar$upstream_vcs_link, "postVar")
+all_model <- lmer(log1p_count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data, REML=FALSE, control = lmerControl(
+optimizer ='optimx', optCtrl=list(method='L-BFGS-B')))
+# this is the file with the lmer multi-level rddAnalysis
 library(tidyverse)
 library(plyr)
-#get the contrib data instead
+# 0 loading the readme data in
 try(setwd(dirname(rstudioapi::getActiveDocumentContext()$path)))
-contrib_df <- read_csv("../final_data/deb_contrib_did.csv")
-#some preprocessing and expansion
+readme_df <- read_csv("../final_data/deb_readme_did.csv")
+# 1 preprocessing
+#colnames(readme_df) <- c("upstream_vcs_link", "event_date", "event_hash", "before_all_ct", "before_mrg_ct", "after_all_ct", "after_mrg_ct", "before_auth_new", "after_commit_new", "after_auth_new", "before_commit_new")
 col_order <- c("upstream_vcs_link", "age_of_project", "event_date", "event_hash", "before_all_ct", "after_all_ct", "before_mrg_ct", "after_mrg_ct", "before_auth_new", "after_auth_new", "before_commit_new",  "after_commit_new")
-contrib_df <- contrib_df[,col_order]
-contrib_df$ct_before_all <- str_split(gsub("[][]","", contrib_df$before_all_ct), ", ")
-contrib_df$ct_after_all <- str_split(gsub("[][]","", contrib_df$after_all_ct), ", ")
-contrib_df$ct_before_mrg <- str_split(gsub("[][]","", contrib_df$before_mrg_ct), ", ")
-contrib_df$ct_after_mrg <- str_split(gsub("[][]","", contrib_df$after_mrg_ct), ", ")
+readme_df <- readme_df[,col_order]
+readme_df$ct_before_all <- str_split(gsub("[][]","", readme_df$before_all_ct), ", ")
+readme_df$ct_after_all <- str_split(gsub("[][]","", readme_df$after_all_ct), ", ")
+readme_df$ct_before_mrg <- str_split(gsub("[][]","", readme_df$before_mrg_ct), ", ")
+readme_df$ct_after_mrg <- str_split(gsub("[][]","", readme_df$after_mrg_ct), ", ")
 drop <- c("before_all_ct", "before_mrg_ct", "after_all_ct", "after_mrg_ct")
-contrib_df = contrib_df[,!(names(contrib_df) %in% drop)]
+readme_df = readme_df[,!(names(readme_df) %in% drop)]
 # 2 some expansion needs to happens for each project
 expand_timeseries <- function(project_row) {
 longer <- project_row |>
@@ -419,9 +321,9 @@ longer$count <- as.numeric(longer$count)
 #longer <- longer[which(longer$observation_type == "all"),]
 return(longer)
 }
-expanded_data <- expand_timeseries(contrib_df[1,])
-for (i in 2:nrow(contrib_df)){
-expanded_data <- rbind(expanded_data, expand_timeseries(contrib_df[i,]))
+expanded_data <- expand_timeseries(readme_df[1,])
+for (i in 2:nrow(readme_df)){
+expanded_data <- rbind(expanded_data, expand_timeseries(readme_df[i,]))
 }
 #filter out the windows of time that we're looking at
 window_num <- 8
@@ -434,79 +336,177 @@ windowed_data$week_offset <- windowed_data$week - 27
 #separate out the cleaning d
 all_actions_data <- windowed_data[which(windowed_data$observation_type == "all"),]
 mrg_actions_data <- windowed_data[which(windowed_data$observation_type == "mrg"),]
-all_actions_data$logged_count <- log(all_actions_data$count)
 all_actions_data$log1p_count <- log1p(all_actions_data$count)
-# now for merge
-mrg_actions_data$logged_count <- log(mrg_actions_data$count)
-mrg_actions_data$log1p_count <- log1p(mrg_actions_data$count)
-#TKTK ---------------------
-#imports for models
+# 3 rdd in lmer analysis
+# rdd: https://rpubs.com/phle/r_tutorial_regression_discontinuity_design
+# lmer: https://www.youtube.com/watch?v=LzAwEKrn2Mc
 library(lme4)
+# https://www.bristol.ac.uk/cmm/learning/videos/random-intercepts.html#exvar
 library(optimx)
 library(lattice)
-#models -- TKTK need to be fixed
-all_model <- lmer(log1p_count ~ D * I(week_offset)+ scaled_project_age + (week_offset| upstream_vcs_link), data=all_actions_data, REML=FALSE, control = lmerControl(
+all_model <- lmer(log1p_count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data, REML=FALSE, control = lmerControl(
 optimizer ='optimx', optCtrl=list(method='L-BFGS-B')))
-summary(all_model)
 #identifying the quartiles of effect for D
-all_model_ranef <- ranef(all_model)
-#d_effect_ranef_all <- all_model_ranef[all_model_ranef$term=="D",]
-#d_effect_ranef_all$quartile <- ntile(d_effect_ranef_all$condval, 4)
-df_ranefs <- as.data.frame(all_model_ranef)
-has_zero <- function(condval, condsd){
-bounds <- condsd * 1.96
-return(ifelse(((condval - bounds) < 0),ifelse(((condval + bounds) > 0), 1, 0), 2))
+mmcm = coef(all_model)$upstream_vcs_link[, 1]
+vcov.vals = as.data.frame(VarCorr(all_model))
+View(vcov.vals)
+#identifying the quartiles of effect for D
+mmcm = coef(all_model)$upstream_vcs_link
+View(mmcm)
+summary(all_model)$coef[,2]
+View(mmcm)
+variance_components <- VarCorr(all_model)
+group_variance <- attr(variance_components$upstream_vcs_link, "stddev")^2
+View(mmcm)
+fixef(all())
+fixef(all_model
+summary(all_model)$coef[,2]
+fixef(all_model)
+fixed_impacts = fixef(all_model)
+dotplot(all_model_ranef_condvar)
+all_model_ranef_condvar <- ranef(all_model, condVar = TRUE)
+dotplot(all_model_ranef_condvar)
+broom.mixed::tidy(all_model, effects = "ran_vals", conf.int = TRUE)
+test <- broom.mixed::tidy(all_model, effects = "ran_vals", conf.int = TRUE)
+View(test)
+all_gmodel <- glmer(log1p_count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data, family = Gamma)
+all_gmodel <- glmer(count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data, family = Gamma)
+all_gmodel <- glmer(log1p_count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data, family=poisson)
+all_gmodel <- glmer(log1p_count ~ D * I(week_offset)+ scaled_project_age + (D | upstream_vcs_link), data=all_actions_data, family=poisson)
+all_gmodel <- glmer(log1p_count ~ D * I(week_offset)+ scaled_project_age + (D | upstream_vcs_link), data=all_actions_data, family=binomial)
+all_gmodel <- glmer(count ~ D * I(week_offset)+ scaled_project_age + (D | upstream_vcs_link), data=all_actions_data, family=binomial)
+df_ranefs <- as.data.frame(all_model_ranef_condvar)
+all_gmodel <- glmer(count ~ D * I(week_offset)+ scaled_project_age + (D | upstream_vcs_link), data=all_actions_data, family=binomial)
+all_gmodel <- glmer(count ~ D * I(week_offset)+ scaled_project_age + (1 | upstream_vcs_link), data=all_actions_data, family=poisson)
+all_model_ranef_condvar <- ranef(all_gmodel, condVar = TRUE)
+all_model_ranef_condvar <- ranef(all_model, condVar = TRUE)
+all_gmodel_ranef_condvar <- ranef(all_gmodel, condVar = TRUE)
+View(all_gmodel_ranef_condvar)
+test <- broom.mixed::tidy(all_gmodel, effects = "ran_vals", conf.int = TRUE)
+View(test)
+all_gmodel <- glmer(count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)+ scaled_project_age | upstream_vcs_link), data=all_actions_data)
+test <- broom.mixed::tidy(all_gmodel, effects = "ran_vals", conf.int = TRUE)
+View(test)
+summary(all_gmodel)
+all_model <- lmer(log1p_count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data, REML=TRUE, control = lmerControl(
+optimizer ='optimx', optCtrl=list(method='L-BFGS-B')))
+test <- broom.mixed::tidy(all_model, effects = "ran_vals", conf.int = TRUE)
+View(test)
+test_condvals <- broom.mixed::tidy(all_gmodel, effects = "ran_vals", conf.int = TRUE)
+View(test_condvals)
+test_glmer_ranef_D <- test_condvals [which(test_condvals $term == "D"),]
+View(test_glmer_ranef_D)
+test_glmer_ranef_D <- test_condvals [which(test_condvals $term == "D"),]
+has_zero <- function(estimate, low, high){
+return(ifelse((low < 0),ifelse((high > 0), 1, 0), 2))
 }
-df_ranefs <- df_ranefs |>
-mutate(ranef_grouping = has_zero(condval, condsd)) |>
-mutate(rank = rank(condval))
-wo_df_ranef <- df_ranefs[which(df_ranefs$term == "week_offset"),]
-library(ggplot2)
-wo_df_ranef |>
+test_glmer_ranef_D <- test_glmer_ranef_D |>
+mutate(ranef_grouping = has_zero(estimate, conf.low, conf.high)) |>
+mutate(rank = rank(estimate))
+test_glmer_ranef_D |>
 ggplot(aes(x=rank, y=condval, col = as.factor(ranef_grouping))) +
-geom_linerange(aes(ymin= condval - (1.96 * condsd), ymax= condval + (1.96 * condsd)))
-wo_df_ranef |>
-ggplot(aes(x=rank, y=condval, col = as.factor(ranef_grouping))) +
-geom_linerange(aes(ymin= condval - (1.96 * condsd), ymax= condval + (1.96 * condsd))) +
-geom_bw()
-wo_df_ranef |>
-ggplot(aes(x=rank, y=condval, col = as.factor(ranef_grouping))) +
-geom_linerange(aes(ymin= condval - (1.96 * condsd), ymax= condval + (1.96 * condsd))) +
+geom_linerange(aes(ymin= conf.low, ymax= conf.high)) +
 theme_bw()
-wo_df_ranef |>
-ggplot(aes(x=rank, y=condval, col = as.factor(ranef_grouping))) +
-geom_pointrange(aes(ymin= condval - (1.96 * condsd), ymax= condval + (1.96 * condsd))) +
+test_glmer_ranef_D |>
+ggplot(aes(x=rank, y=estimate, col = as.factor(ranef_grouping))) +
+geom_linerange(aes(ymin= conf.low, ymax= conf.high)) +
 theme_bw()
-wo_df_ranef |>
-ggplot(aes(x=rank, y=condval, col = as.factor(ranef_grouping))) +
-geom_crossbar(aes(ymin= condval - (1.96 * condsd), ymax= condval + (1.96 * condsd)), width=0.2) +
+summary(all_gmodel)
+all_gmodel <- glmer(count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data)
+summary(all_gmodel)
+test_condvals <- broom.mixed::tidy(all_gmodel, effects = "ran_vals", conf.int = TRUE)
+test_glmer_ranef_D <- test_condvals [which(test_condvals $term == "D"),]
+has_zero <- function(estimate, low, high){
+return(ifelse((low < 0),ifelse((high > 0), 1, 0), 2))
+}
+test_glmer_ranef_D <- test_glmer_ranef_D |>
+mutate(ranef_grouping = has_zero(estimate, conf.low, conf.high)) |>
+mutate(rank = rank(estimate))
+test_glmer_ranef_D |>
+ggplot(aes(x=rank, y=estimate, col = as.factor(ranef_grouping))) +
+geom_linerange(aes(ymin= conf.low, ymax= conf.high)) +
 theme_bw()
-wo_df_ranef |>
-ggplot(aes(x=rank, y=condval, col = as.factor(ranef_grouping))) +
-geom_linerange(aes(ymin= condval - (1.96 * condsd), ymax= condval + (1.96 * condsd))) +
+View(test_glmer_ranef_D)
+View(test_condvals)
+all_gmodel <- glmer(count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data)
+summary(all_gmodel)
+test_condvals <- broom.mixed::tidy(all_gmodel, effects = "ran_vals", conf.int = TRUE)
+View(test_condvals)
+all_gmodel <- glmer(count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data, family = Poisson)
+all_gmodel <- glmer(count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data, family = poisson)
+summary(all_gmodel)
+all_gmodel <- glmer(count ~ D * I(week_offset)+ scaled_project_age + (D | upstream_vcs_link), data=all_actions_data, family = poisson)
+summary(all_gmodel)
+test_condvals <- broom.mixed::tidy(all_gmodel, effects = "ran_vals", conf.int = TRUE)
+test_glmer_ranef_D <- test_condvals [which(test_condvals $term == "D"),]
+has_zero <- function(estimate, low, high){
+return(ifelse((low < 0),ifelse((high > 0), 1, 0), 2))
+}
+test_glmer_ranef_D <- test_glmer_ranef_D |>
+mutate(ranef_grouping = has_zero(estimate, conf.low, conf.high)) |>
+mutate(rank = rank(estimate))
+test_glmer_ranef_D |>
+ggplot(aes(x=rank, y=estimate, col = as.factor(ranef_grouping))) +
+geom_linerange(aes(ymin= conf.low, ymax= conf.high)) +
 theme_bw()
-wo_df_ranef |>
-ggplot(aes(x=grp, y=condval, col = as.factor(ranef_grouping))) +
-geom_linerange(aes(ymin= condval - (1.96 * condsd), ymax= condval + (1.96 * condsd))) +
+all_gmodel <- glmer(count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data, family = poisson)
+all_gmodel <- glmer(count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data, nAGQ=0, family = poisson)
+summary(all_gmodel)
+test_condvals <- broom.mixed::tidy(all_gmodel, effects = "ran_vals", conf.int = TRUE)
+test_glmer_ranef_D <- test_condvals [which(test_condvals $term == "D"),]
+has_zero <- function(estimate, low, high){
+return(ifelse((low < 0),ifelse((high > 0), 1, 0), 2))
+}
+test_glmer_ranef_D <- test_glmer_ranef_D |>
+mutate(ranef_grouping = has_zero(estimate, conf.low, conf.high)) |>
+mutate(rank = rank(estimate))
+test_glmer_ranef_D |>
+ggplot(aes(x=rank, y=estimate, col = as.factor(ranef_grouping))) +
+geom_linerange(aes(ymin= conf.low, ymax= conf.high)) +
 theme_bw()
-wo_df_ranef <- wo_df_ranef |>
-arrange(condval)
-wo_df_ranef |>
-ggplot(aes(x=grp, y=condval, col = as.factor(ranef_grouping))) +
-geom_linerange(aes(ymin= condval - (1.96 * condsd), ymax= condval + (1.96 * condsd))) +
-theme_bw()
-View(wo_df_ranef)
-df_ranefs <- df_ranefs |>
-mutate(ranef_grouping = has_zero(condval, condsd))
-wo_df_ranef <- df_ranefs[which(df_ranefs$term == "week_offset"),]
-wo_df_ranef <- wo_df_ranef |>
-mutate(rank = rank(condval))
-library(ggplot2)
-wo_df_ranef |>
-ggplot(aes(x=grp, y=condval, col = as.factor(ranef_grouping))) +
-geom_linerange(aes(ymin= condval - (1.96 * condsd), ymax= condval + (1.96 * condsd))) +
-theme_bw()
-wo_df_ranef |>
-ggplot(aes(x=rank, y=condval, col = as.factor(ranef_grouping))) +
-geom_linerange(aes(ymin= condval - (1.96 * condsd), ymax= condval + (1.96 * condsd))) +
+variance(all_actions_data$log1p_count)
+var(all_actions_data$log1p_count)
+mean (all_actions_data$log1p_count)
+#all_gmodel <- glmer(count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data, nAGQ=0, family = poisson)
+all_gmodel <- glmer.nb(count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link),data=all_actions_data)
+#all_gmodel <- glmer(count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data, nAGQ=0, family = poisson)
+all_gmodel <- glmer.nb(count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link),
+control=glmerControl(optimizer="bobyqa",
+optCtrl=list(maxfun=2e5)), data=all_actions_data)
+#all_gmodel <- glmer(count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data, nAGQ=0, family = poisson)
+all_gmodel <- glmer.nb(count ~ D * I(week_offset)+ scaled_project_age + (D | upstream_vcs_link),
+control=glmerControl(optimizer="bobyqa",
+optCtrl=list(maxfun=2e5)), data=all_actions_data)
+summary(all_gmodel)
+test_condvals <- broom.mixed::tidy(all_gmodel, effects = "ran_vals", conf.int = TRUE)
+test_glmer_ranef_D <- test_condvals [which(test_condvals $term == "D"),]
+has_zero <- function(estimate, low, high){
+return(ifelse((low < 0),ifelse((high > 0), 1, 0), 2))
+}
+test_glmer_ranef_D <- test_glmer_ranef_D |>
+mutate(ranef_grouping = has_zero(estimate, conf.low, conf.high)) |>
+mutate(rank = rank(estimate))
+test_glmer_ranef_D |>
+ggplot(aes(x=rank, y=estimate, col = as.factor(ranef_grouping))) +
+geom_linerange(aes(ymin= conf.low, ymax= conf.high)) +
 theme_bw()
+#all_gmodel <- glmer(count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data, nAGQ=0, family = poisson)
+#all_gmodel <- glmer.nb(count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset) | upstream_vcs_link),
+#                       control=glmerControl(optimizer="bobyqa",
+#                                            optCtrl=list(maxfun=2e5)), data=all_actions_data)
+all_gmodel <- glmer.nb(count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset) | upstream_vcs_link), data=all_actions_data)
+#all_gmodel <- glmer(count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data, nAGQ=0, family = poisson)
+#all_gmodel <- glmer.nb(count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset) | upstream_vcs_link),
+#                       control=glmerControl(optimizer="bobyqa",
+#                                            optCtrl=list(maxfun=2e5)), data=all_actions_data)
+all_gmodel <- glmer.nb(count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset) | upstream_vcs_link), data=all_actions_data, verbose=TRUE)
+#all_gmodel <- glmer(count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data, nAGQ=0, family = poisson)
+#all_gmodel <- glmer.nb(count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset) | upstream_vcs_link),
+#                       control=glmerControl(optimizer="bobyqa",
+#                                            optCtrl=list(maxfun=2e5)), data=all_actions_data)
+all_gmodel <- glmer.nb(count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)  | upstream_vcs_link), data=all_actions_data)
+#all_gmodel <- glmer(count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data, nAGQ=0, family = poisson)
+#all_gmodel <- glmer.nb(count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset) | upstream_vcs_link),
+#                       control=glmerControl(optimizer="bobyqa",
+#                                            optCtrl=list(maxfun=2e5)), data=all_actions_data)
+all_gmodel <- glmer.nb(count ~ D * I(week_offset)+ scaled_project_age + (D | upstream_vcs_link), data=all_actions_data)
diff --git a/R/readmeRDDAnalysis.R b/R/readmeRDDAnalysis.R
index 777d7e5..605cdbe 100644
--- a/R/readmeRDDAnalysis.R
+++ b/R/readmeRDDAnalysis.R
@@ -63,18 +63,39 @@ all_model <- lmer(log1p_count ~ D * I(week_offset)+ scaled_project_age + (D * I(
   optimizer ='optimx', optCtrl=list(method='L-BFGS-B')))
 summary_of_all <- summary(all_model)
 #identifying the quartiles of effect for D
+mmcm = coef(all_model)$upstream_vcs_link
+fixed_impacts = fixef(all_model)
+summary(all_model)$coef[,2]
+variance_components <- VarCorr(all_model)
 all_model_ranef_condvar <- ranef(all_model, condVar = TRUE)
-all_model_ranef <- ranef(all_model, condVar = FALSE)
+dotplot(all_model_ranef_condvar)
+test <- broom.mixed::tidy(all_model, effects = "ran_vals", conf.int = TRUE)
 attr(all_model_ranef_condvar$upstream_vcs_link, "postVar")
 
 all_coefficients <- coef(all_model)
 all_standard_errors <- sqrt(diag(vcov(all_model)))[1]
-#all_conf_intervals <- cbind(all_coefficients - 1.96 * all_standard_errors, 
-#                        all_coefficients + 1.96 * all_standard_errors)
 
-df_ranefs <- as.data.frame(all_model_ranef_condvar)
-df_rn_no_cv <- as.data.frame(all_model_ranef)
-D_df_ranef <- df_ranefs[which(df_ranefs$term == "D"),]
+var(all_actions_data$log1p_count) # 1.125429
+mean (all_actions_data$log1p_count) # 0.6426873
+
+#all_gmodel <- glmer(count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data, nAGQ=0, family = poisson)
+#all_gmodel <- glmer.nb(count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset) | upstream_vcs_link),
+#                       control=glmerControl(optimizer="bobyqa",
+#                                            optCtrl=list(maxfun=2e5)), data=all_actions_data)
+all_gmodel <- glmer.nb(count ~ D * I(week_offset)+ scaled_project_age + (D | upstream_vcs_link), data=all_actions_data)
+summary(all_gmodel)
+test_condvals <- broom.mixed::tidy(all_gmodel, effects = "ran_vals", conf.int = TRUE)
+test_glmer_ranef_D <- test_condvals [which(test_condvals $term == "D"),]
+has_zero <- function(estimate, low, high){
+  return(ifelse((low < 0),ifelse((high > 0), 1, 0), 2))
+}
+test_glmer_ranef_D <- test_glmer_ranef_D |>
+  mutate(ranef_grouping = has_zero(estimate, conf.low, conf.high)) |>
+  mutate(rank = rank(estimate))
+test_glmer_ranef_D |> 
+  ggplot(aes(x=rank, y=estimate, col = as.factor(ranef_grouping))) +
+  geom_linerange(aes(ymin= conf.low, ymax= conf.high)) +
+  theme_bw()
 #below this groups the ranefs
 """
 has_zero <- function(condval, condsd){