updating EDA around outcome variables
This commit is contained in:
parent
b48a684185
commit
00a1c5d157
@ -43,6 +43,11 @@ mrg_actions_data <- windowed_data[which(windowed_data$observation_type == "mrg")
|
|||||||
#logging
|
#logging
|
||||||
all_actions_data$logged_count <- log(all_actions_data$count)
|
all_actions_data$logged_count <- log(all_actions_data$count)
|
||||||
all_actions_data$log1p_count <- log1p(all_actions_data$count)
|
all_actions_data$log1p_count <- log1p(all_actions_data$count)
|
||||||
|
#EDA
|
||||||
|
range(all_actions_data$log1p_count) # 0.000000 6.745236
|
||||||
|
mean(all_actions_data$log1p_count) # 1.200043
|
||||||
|
var(all_actions_data$log1p_count) # 1.753764
|
||||||
|
median(all_actions_data$log1p_count) # 0.6931472
|
||||||
# now for merge
|
# now for merge
|
||||||
mrg_actions_data$logged_count <- log(mrg_actions_data$count)
|
mrg_actions_data$logged_count <- log(mrg_actions_data$count)
|
||||||
mrg_actions_data$log1p_count <- log1p(mrg_actions_data$count)
|
mrg_actions_data$log1p_count <- log1p(mrg_actions_data$count)
|
||||||
|
@ -45,6 +45,7 @@ mrg_actions_data <- windowed_data[which(windowed_data$observation_type == "mrg")
|
|||||||
#log the dependent
|
#log the dependent
|
||||||
all_actions_data$logged_count <- log(all_actions_data$count)
|
all_actions_data$logged_count <- log(all_actions_data$count)
|
||||||
all_actions_data$log1p_count <- log1p(all_actions_data$count)
|
all_actions_data$log1p_count <- log1p(all_actions_data$count)
|
||||||
|
range(all_actions_data$log1p_count)
|
||||||
# 3 rdd in lmer analysis
|
# 3 rdd in lmer analysis
|
||||||
# rdd: https://rpubs.com/phle/r_tutorial_regression_discontinuity_design
|
# rdd: https://rpubs.com/phle/r_tutorial_regression_discontinuity_design
|
||||||
# lmer: https://www.youtube.com/watch?v=LzAwEKrn2Mc
|
# lmer: https://www.youtube.com/watch?v=LzAwEKrn2Mc
|
||||||
@ -55,8 +56,10 @@ library(lattice)
|
|||||||
#some more EDA to go between Poisson and neg binomial
|
#some more EDA to go between Poisson and neg binomial
|
||||||
var(all_actions_data$log1p_count) # 1.125429
|
var(all_actions_data$log1p_count) # 1.125429
|
||||||
mean (all_actions_data$log1p_count) # 0.6426873
|
mean (all_actions_data$log1p_count) # 0.6426873
|
||||||
|
median(all_actions_data$log1p_count) #0
|
||||||
var(all_actions_data$count) # 268.4449
|
var(all_actions_data$count) # 268.4449
|
||||||
mean (all_actions_data$count) # 3.757298
|
mean (all_actions_data$count) # 3.757298
|
||||||
|
median(all_actions_data$count) # 0
|
||||||
#all_log1p_gmodel <- glmer.nb(log1p_count ~ D * week_offset+ scaled_project_age + (D * week_offset | upstream_vcs_link), data=all_actions_data, nAGQ=1, control=glmerControl(optimizer="bobyqa",
|
#all_log1p_gmodel <- glmer.nb(log1p_count ~ D * week_offset+ scaled_project_age + (D * week_offset | upstream_vcs_link), data=all_actions_data, nAGQ=1, control=glmerControl(optimizer="bobyqa",
|
||||||
# optCtrl=list(maxfun=1e5)))
|
# optCtrl=list(maxfun=1e5)))
|
||||||
all_log1p_gmodel <- readRDS("final_models/0510_rm_all.rda")
|
all_log1p_gmodel <- readRDS("final_models/0510_rm_all.rda")
|
||||||
|
Loading…
Reference in New Issue
Block a user