From 40a99532808ea628c412465340b329ec157e160e Mon Sep 17 00:00:00 2001 From: mjgaughan Date: Fri, 19 Apr 2024 21:07:06 -0500 Subject: [PATCH] added poisson model --- R/didAnalysis.R | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/R/didAnalysis.R b/R/didAnalysis.R index d712b14..f7386d0 100644 --- a/R/didAnalysis.R +++ b/R/didAnalysis.R @@ -38,19 +38,36 @@ expanded_data <- expanded_data |> mutate(D = ifelse(week > 26, 1, 0)) #separate out the cleaning d all_actions_data <- expanded_data[which(expanded_data$observation_type == "all"),] -mean(all_actions_data$count) mrg_actions_data <- expanded_data[which(expanded_data$observation_type == "mrg"),] +#find some EDA to identify which types of models might be the best for this +mean(all_actions_data$count) +median(all_actions_data$count) +table(all_actions_data$count) +var(all_actions_data$count) +qqnorm(all_actions_data$count) +y <- qunif(ppoints(length(all_actions_data$count))) +qqplot(all_actions_data$count, y) # 3 rdd in lmer analysis # rdd: https://rpubs.com/phle/r_tutorial_regression_discontinuity_design # lmer: https://www.youtube.com/watch?v=LzAwEKrn2Mc library(lme4) -draft_all_model <- lmer(count ~ D * I(week - 26) + age_of_project + (1 + D |upstream_vcs_link), REML=FALSE, data=all_actions_data) -summary(draft_all_model) +flat_all_model <- lm(count ~ D + I(week - 26) + D:I(week - 26) + age_of_project, REML=FALSE, data=all_actions_data) +summary(flat_all_model) +lmer_all_model <- lmer(count ~ D + I(week - 26) + D:I(week - 26) + age_of_project + (1 + D |upstream_vcs_link), REML=FALSE, data=all_actions_data) +summary(lmer_all_model) +lmer_residuals <- residuals(lmer_all_model) +qqnorm(lmer_residuals) +#if I'm reading the residuals right, the poisson is better? +poisson_all_model <- glmer(count ~ D + I(week - 26) + D:I(week - 26) + age_of_project + (1 + D |upstream_vcs_link), data=all_actions_data, family = poisson(link = "log"), nAGQ = 100) +summary(poisson_all_model) +poisson_residuals <- residuals(poisson_all_model) +qqnorm(poisson_residuals) +# Performance: draft_mrg_model <- lmer(count ~ D * I(week - 26) + age_of_project + (1 + D |upstream_vcs_link), REML=FALSE, data=mrg_actions_data) summary(draft_mrg_model) -# need to calculate inter-class correlation coefficient? +# Performance: + library(merTools) ICC(outcome="count", group="week", data=all_actions_data) #testing for different types of models -# BIC -library(flexmix) +