added poisson model

This commit is contained in:
mjgaughan 2024-04-19 21:07:06 -05:00
parent 1dc38fbad9
commit 40a9953280

View File

@ -38,19 +38,36 @@ expanded_data <- expanded_data |>
mutate(D = ifelse(week > 26, 1, 0)) mutate(D = ifelse(week > 26, 1, 0))
#separate out the cleaning d #separate out the cleaning d
all_actions_data <- expanded_data[which(expanded_data$observation_type == "all"),] all_actions_data <- expanded_data[which(expanded_data$observation_type == "all"),]
mean(all_actions_data$count)
mrg_actions_data <- expanded_data[which(expanded_data$observation_type == "mrg"),] mrg_actions_data <- expanded_data[which(expanded_data$observation_type == "mrg"),]
#find some EDA to identify which types of models might be the best for this
mean(all_actions_data$count)
median(all_actions_data$count)
table(all_actions_data$count)
var(all_actions_data$count)
qqnorm(all_actions_data$count)
y <- qunif(ppoints(length(all_actions_data$count)))
qqplot(all_actions_data$count, y)
# 3 rdd in lmer analysis # 3 rdd in lmer analysis
# rdd: https://rpubs.com/phle/r_tutorial_regression_discontinuity_design # rdd: https://rpubs.com/phle/r_tutorial_regression_discontinuity_design
# lmer: https://www.youtube.com/watch?v=LzAwEKrn2Mc # lmer: https://www.youtube.com/watch?v=LzAwEKrn2Mc
library(lme4) library(lme4)
draft_all_model <- lmer(count ~ D * I(week - 26) + age_of_project + (1 + D |upstream_vcs_link), REML=FALSE, data=all_actions_data) flat_all_model <- lm(count ~ D + I(week - 26) + D:I(week - 26) + age_of_project, REML=FALSE, data=all_actions_data)
summary(draft_all_model) summary(flat_all_model)
lmer_all_model <- lmer(count ~ D + I(week - 26) + D:I(week - 26) + age_of_project + (1 + D |upstream_vcs_link), REML=FALSE, data=all_actions_data)
summary(lmer_all_model)
lmer_residuals <- residuals(lmer_all_model)
qqnorm(lmer_residuals)
#if I'm reading the residuals right, the poisson is better?
poisson_all_model <- glmer(count ~ D + I(week - 26) + D:I(week - 26) + age_of_project + (1 + D |upstream_vcs_link), data=all_actions_data, family = poisson(link = "log"), nAGQ = 100)
summary(poisson_all_model)
poisson_residuals <- residuals(poisson_all_model)
qqnorm(poisson_residuals)
# Performance:
draft_mrg_model <- lmer(count ~ D * I(week - 26) + age_of_project + (1 + D |upstream_vcs_link), REML=FALSE, data=mrg_actions_data) draft_mrg_model <- lmer(count ~ D * I(week - 26) + age_of_project + (1 + D |upstream_vcs_link), REML=FALSE, data=mrg_actions_data)
summary(draft_mrg_model) summary(draft_mrg_model)
# need to calculate inter-class correlation coefficient? # Performance:
library(merTools) library(merTools)
ICC(outcome="count", group="week", data=all_actions_data) ICC(outcome="count", group="week", data=all_actions_data)
#testing for different types of models #testing for different types of models
# BIC
library(flexmix)