From 00a1c5d1574a257b7a01cb8a8f9fca7cad6d86bf Mon Sep 17 00:00:00 2001 From: Matthew Gaughan Date: Thu, 13 Jun 2024 13:40:27 -0500 Subject: [PATCH] updating EDA around outcome variables --- R/contribRDDAnalysis.R | 5 +++++ R/readmeRDDAnalysis.R | 3 +++ 2 files changed, 8 insertions(+) diff --git a/R/contribRDDAnalysis.R b/R/contribRDDAnalysis.R index 0aa4d1d..a22bfc9 100644 --- a/R/contribRDDAnalysis.R +++ b/R/contribRDDAnalysis.R @@ -43,6 +43,11 @@ mrg_actions_data <- windowed_data[which(windowed_data$observation_type == "mrg") #logging all_actions_data$logged_count <- log(all_actions_data$count) all_actions_data$log1p_count <- log1p(all_actions_data$count) +#EDA +range(all_actions_data$log1p_count) # 0.000000 6.745236 +mean(all_actions_data$log1p_count) # 1.200043 +var(all_actions_data$log1p_count) # 1.753764 +median(all_actions_data$log1p_count) # 0.6931472 # now for merge mrg_actions_data$logged_count <- log(mrg_actions_data$count) mrg_actions_data$log1p_count <- log1p(mrg_actions_data$count) diff --git a/R/readmeRDDAnalysis.R b/R/readmeRDDAnalysis.R index 3a4d644..baa8db3 100644 --- a/R/readmeRDDAnalysis.R +++ b/R/readmeRDDAnalysis.R @@ -45,6 +45,7 @@ mrg_actions_data <- windowed_data[which(windowed_data$observation_type == "mrg") #log the dependent all_actions_data$logged_count <- log(all_actions_data$count) all_actions_data$log1p_count <- log1p(all_actions_data$count) +range(all_actions_data$log1p_count) # 3 rdd in lmer analysis # rdd: https://rpubs.com/phle/r_tutorial_regression_discontinuity_design # lmer: https://www.youtube.com/watch?v=LzAwEKrn2Mc @@ -55,8 +56,10 @@ library(lattice) #some more EDA to go between Poisson and neg binomial var(all_actions_data$log1p_count) # 1.125429 mean (all_actions_data$log1p_count) # 0.6426873 +median(all_actions_data$log1p_count) #0 var(all_actions_data$count) # 268.4449 mean (all_actions_data$count) # 3.757298 +median(all_actions_data$count) # 0 #all_log1p_gmodel <- glmer.nb(log1p_count ~ D * week_offset+ scaled_project_age + (D * week_offset | upstream_vcs_link), data=all_actions_data, nAGQ=1, control=glmerControl(optimizer="bobyqa", # optCtrl=list(maxfun=1e5))) all_log1p_gmodel <- readRDS("final_models/0510_rm_all.rda")