log1p the data
This commit is contained in:
parent
67946e6e10
commit
c97c24dd13
1020
R/.Rhistory
1020
R/.Rhistory
File diff suppressed because it is too large
Load Diff
@ -43,13 +43,15 @@ windowed_data$week_offset <- windowed_data$week - 27
|
|||||||
all_actions_data <- windowed_data[which(windowed_data$observation_type == "all"),]
|
all_actions_data <- windowed_data[which(windowed_data$observation_type == "all"),]
|
||||||
mrg_actions_data <- windowed_data[which(windowed_data$observation_type == "mrg"),]
|
mrg_actions_data <- windowed_data[which(windowed_data$observation_type == "mrg"),]
|
||||||
#find some EDA to identify which types of models might be the best for this
|
#find some EDA to identify which types of models might be the best for this
|
||||||
mean(all_actions_data$count)
|
hist(log(all_actions_data$count))
|
||||||
median(all_actions_data$count)
|
median(all_actions_data$count)
|
||||||
table(all_actions_data$count)
|
table(all_actions_data$count)
|
||||||
var(all_actions_data$count)
|
var(all_actions_data$count)
|
||||||
qqnorm(all_actions_data$count)
|
qqnorm(all_actions_data$count)
|
||||||
y <- qunif(ppoints(length(all_actions_data$count)))
|
y <- qunif(ppoints(length(all_actions_data$count)))
|
||||||
qqplot(all_actions_data$count, y)
|
qqplot(all_actions_data$count, y)
|
||||||
|
all_actions_data$logged_count <- log(all_actions_data$count)
|
||||||
|
all_actions_data$log1p_count <- log1p(all_actions_data$count)
|
||||||
# 3 rdd in lmer analysis
|
# 3 rdd in lmer analysis
|
||||||
# rdd: https://rpubs.com/phle/r_tutorial_regression_discontinuity_design
|
# rdd: https://rpubs.com/phle/r_tutorial_regression_discontinuity_design
|
||||||
# lmer: https://www.youtube.com/watch?v=LzAwEKrn2Mc
|
# lmer: https://www.youtube.com/watch?v=LzAwEKrn2Mc
|
||||||
@ -67,8 +69,9 @@ windowed_sample_data <- expanded_sample_data |>
|
|||||||
windowed_sample_data$scaled_project_age <- scale(windowed_sample_data$age_of_project)
|
windowed_sample_data$scaled_project_age <- scale(windowed_sample_data$age_of_project)
|
||||||
windowed_sample_data$week_offset <- windowed_sample_data$week - 27
|
windowed_sample_data$week_offset <- windowed_sample_data$week - 27
|
||||||
all_actions_sample_data <- windowed_sample_data[which(windowed_sample_data$observation_type == "all"),]
|
all_actions_sample_data <- windowed_sample_data[which(windowed_sample_data$observation_type == "all"),]
|
||||||
|
all_actions_sample_data$log1p_count <- log1p(all_actions_sample_data$count)
|
||||||
#test model
|
#test model
|
||||||
test_model <- lmer(count ~ D * I(week_offset) + scaled_project_age + (D * I(week_offset)|upstream_vcs_link), data=all_actions_sample_data, REML=FALSE)
|
test_model <- lmer(log1p_count ~ D * I(week_offset) + scaled_project_age + (D * I(week_offset)|upstream_vcs_link), data=all_actions_sample_data, REML=FALSE)
|
||||||
summary(test_model)
|
summary(test_model)
|
||||||
#plot results
|
#plot results
|
||||||
p <- ggplot(all_actions_sample_data, aes(x=week_offset, y=count, color=upstream_vcs_link), show.legend = FALSE) +
|
p <- ggplot(all_actions_sample_data, aes(x=week_offset, y=count, color=upstream_vcs_link), show.legend = FALSE) +
|
||||||
@ -77,7 +80,7 @@ p <- ggplot(all_actions_sample_data, aes(x=week_offset, y=count, color=upstream_
|
|||||||
theme_bw()
|
theme_bw()
|
||||||
p
|
p
|
||||||
##end of the model testing and plotting section
|
##end of the model testing and plotting section
|
||||||
all_model <- lmer(count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data, REML=FALSE)
|
all_model <- lmer(log1p_count ~ D * I(week_offset)+ scaled_project_age + (D * I(week_offset)| upstream_vcs_link), data=all_actions_data, REML=FALSE)
|
||||||
summary(all_model)
|
summary(all_model)
|
||||||
all_residuals <- residuals(all_model)
|
all_residuals <- residuals(all_model)
|
||||||
qqnorm(all_residuals)
|
qqnorm(all_residuals)
|
||||||
@ -87,7 +90,6 @@ summary(mrg_model)
|
|||||||
mrg_residuals <- residuals(mrg_model)
|
mrg_residuals <- residuals(mrg_model)
|
||||||
qqnorm(mrg_residuals)
|
qqnorm(mrg_residuals)
|
||||||
# Performance:
|
# Performance:
|
||||||
|
|
||||||
library(merTools)
|
library(merTools)
|
||||||
ICC(outcome="count", group="week", data=all_actions_data)
|
ICC(outcome="count", group="week", data=all_actions_data)
|
||||||
#testing for different types of models
|
#testing for different types of models
|
||||||
|
Loading…
Reference in New Issue
Block a user