From 67946e6e109f667c765d4645cd7bb5964bccc620 Mon Sep 17 00:00:00 2001 From: mjgaughan Date: Tue, 23 Apr 2024 13:59:06 -0500 Subject: [PATCH] further specification of model and formula --- R/readmeRDDAnalysis.R | 46 +++++++++++++++++++++++++++++++------------ 1 file changed, 33 insertions(+), 13 deletions(-) diff --git a/R/readmeRDDAnalysis.R b/R/readmeRDDAnalysis.R index a57e4e3..bde791e 100644 --- a/R/readmeRDDAnalysis.R +++ b/R/readmeRDDAnalysis.R @@ -34,10 +34,11 @@ for (i in 2:nrow(readme_df)){ #filter out the windows of time that we're looking at window_num <- 8 windowed_data <- expanded_data |> - filter(week >= (26 - window_num) & week <= (26 + window_num)) |> - mutate(D = ifelse(week > 26, 1, 0)) + filter(week >= (27 - window_num) & week <= (27 + window_num)) |> + mutate(D = ifelse(week > 27, 1, 0)) #scale the age numbers windowed_data$scaled_project_age <- scale(windowed_data$age_of_project) +windowed_data$week_offset <- windowed_data$week - 27 #separate out the cleaning d all_actions_data <- windowed_data[which(windowed_data$observation_type == "all"),] mrg_actions_data <- windowed_data[which(windowed_data$observation_type == "mrg"),] @@ -54,18 +55,37 @@ qqplot(all_actions_data$count, y) # lmer: https://www.youtube.com/watch?v=LzAwEKrn2Mc library(lme4) # https://www.bristol.ac.uk/cmm/learning/videos/random-intercepts.html#exvar -# (D |upstream_vcs_link) or (week | upstream_vcs_link) -poisson_all_model <- glmer(count ~ D + I(week - 26) + D:I(week - 26) + scaled_project_age + (week || upstream_vcs_link), data=all_actions_data, family = poisson(link = "log")) -summary(poisson_all_model) -poisson_residuals <- residuals(poisson_all_model) -qqnorm(poisson_residuals) +#making some random data +sampled_data <- readme_df[sample(nrow(readme_df), 220), ] +expanded_sample_data <- expand_timeseries(sampled_data[1,]) +for (i in 2:nrow(sampled_data)){ + expanded_sample_data <- rbind(expanded_sample_data, expand_timeseries(sampled_data[i,])) +} +windowed_sample_data <- expanded_sample_data |> + filter(week >= (27 - window_num) & week <= (27 + window_num)) |> + mutate(D = ifelse(week > 27, 1, 0)) +windowed_sample_data$scaled_project_age <- scale(windowed_sample_data$age_of_project) +windowed_sample_data$week_offset <- windowed_sample_data$week - 27 +all_actions_sample_data <- windowed_sample_data[which(windowed_sample_data$observation_type == "all"),] +#test model +test_model <- lmer(count ~ D * I(week_offset) + scaled_project_age + (D * I(week_offset)|upstream_vcs_link), data=all_actions_sample_data, REML=FALSE) +summary(test_model) +#plot results +p <- ggplot(all_actions_sample_data, aes(x=week_offset, y=count, color=upstream_vcs_link), show.legend = FALSE) + + geom_point(size=3, show.legend = FALSE) + + geom_line(aes(y=predict(test_model)), show.legend = FALSE) + + theme_bw() +p +##end of the model testing and plotting section +all_model <- lmer(count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data, REML=FALSE) +summary(all_model) +all_residuals <- residuals(all_model) +qqnorm(all_residuals) # for visualization, may have to run model for each project and then identify top 5 projects for RDD graphs -# -# -poisson_mrg_model <- glmer(count ~ D + I(week - 26) + D:I(week - 26) + scaled_project_age + (week |upstream_vcs_link), data=mrg_actions_data, family = poisson(link = "log")) -summary(poisson_mrg_model) -poisson_mrg_residuals <- residuals(poisson_mrg_model) -qqnorm(poisson_mrg_residuals) +mrg_model <- lmer(count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=mrg_actions_data, REML=FALSE) +summary(mrg_model) +mrg_residuals <- residuals(mrg_model) +qqnorm(mrg_residuals) # Performance: library(merTools)