log1p the data
This commit is contained in:
parent
67946e6e10
commit
c97c24dd13
1020
R/.Rhistory
1020
R/.Rhistory
File diff suppressed because it is too large
Load Diff
@ -43,13 +43,15 @@ windowed_data$week_offset <- windowed_data$week - 27
|
||||
all_actions_data <- windowed_data[which(windowed_data$observation_type == "all"),]
|
||||
mrg_actions_data <- windowed_data[which(windowed_data$observation_type == "mrg"),]
|
||||
#find some EDA to identify which types of models might be the best for this
|
||||
mean(all_actions_data$count)
|
||||
hist(log(all_actions_data$count))
|
||||
median(all_actions_data$count)
|
||||
table(all_actions_data$count)
|
||||
var(all_actions_data$count)
|
||||
qqnorm(all_actions_data$count)
|
||||
y <- qunif(ppoints(length(all_actions_data$count)))
|
||||
qqplot(all_actions_data$count, y)
|
||||
all_actions_data$logged_count <- log(all_actions_data$count)
|
||||
all_actions_data$log1p_count <- log1p(all_actions_data$count)
|
||||
# 3 rdd in lmer analysis
|
||||
# rdd: https://rpubs.com/phle/r_tutorial_regression_discontinuity_design
|
||||
# lmer: https://www.youtube.com/watch?v=LzAwEKrn2Mc
|
||||
@ -67,8 +69,9 @@ windowed_sample_data <- expanded_sample_data |>
|
||||
windowed_sample_data$scaled_project_age <- scale(windowed_sample_data$age_of_project)
|
||||
windowed_sample_data$week_offset <- windowed_sample_data$week - 27
|
||||
all_actions_sample_data <- windowed_sample_data[which(windowed_sample_data$observation_type == "all"),]
|
||||
all_actions_sample_data$log1p_count <- log1p(all_actions_sample_data$count)
|
||||
#test model
|
||||
test_model <- lmer(count ~ D * I(week_offset) + scaled_project_age + (D * I(week_offset)|upstream_vcs_link), data=all_actions_sample_data, REML=FALSE)
|
||||
test_model <- lmer(log1p_count ~ D * I(week_offset) + scaled_project_age + (D * I(week_offset)|upstream_vcs_link), data=all_actions_sample_data, REML=FALSE)
|
||||
summary(test_model)
|
||||
#plot results
|
||||
p <- ggplot(all_actions_sample_data, aes(x=week_offset, y=count, color=upstream_vcs_link), show.legend = FALSE) +
|
||||
@ -77,7 +80,7 @@ p <- ggplot(all_actions_sample_data, aes(x=week_offset, y=count, color=upstream_
|
||||
theme_bw()
|
||||
p
|
||||
##end of the model testing and plotting section
|
||||
all_model <- lmer(count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data, REML=FALSE)
|
||||
all_model <- lmer(log1p_count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data, REML=FALSE)
|
||||
summary(all_model)
|
||||
all_residuals <- residuals(all_model)
|
||||
qqnorm(all_residuals)
|
||||
@ -87,7 +90,6 @@ summary(mrg_model)
|
||||
mrg_residuals <- residuals(mrg_model)
|
||||
qqnorm(mrg_residuals)
|
||||
# Performance:
|
||||
|
||||
library(merTools)
|
||||
ICC(outcome="count", group="week", data=all_actions_data)
|
||||
#testing for different types of models
|
||||
|
Loading…
Reference in New Issue
Block a user