diff --git a/R/didAnalysis.R b/R/didAnalysis.R index 8d27f62..d712b14 100644 --- a/R/didAnalysis.R +++ b/R/didAnalysis.R @@ -5,8 +5,8 @@ library(plyr) try(setwd(dirname(rstudioapi::getActiveDocumentContext()$path))) readme_df <- read_csv("../final_data/deb_readme_did.csv") # 1 preprocessing -colnames(readme_df) <- c("upstream_vcs_link", "event_date", "event_hash", "before_all_ct", "before_mrg_ct", "after_all_ct", "after_mrg_ct", "before_auth_new", "after_commit_new", "after_auth_new", "before_commit_new") -col_order <- c("upstream_vcs_link", "event_date", "event_hash", "before_all_ct", "after_all_ct", "before_mrg_ct", "after_mrg_ct", "before_auth_new", "after_auth_new", "before_commit_new", "after_commit_new") +#colnames(readme_df) <- c("upstream_vcs_link", "event_date", "event_hash", "before_all_ct", "before_mrg_ct", "after_all_ct", "after_mrg_ct", "before_auth_new", "after_commit_new", "after_auth_new", "before_commit_new") +col_order <- c("upstream_vcs_link", "age_of_project", "event_date", "event_hash", "before_all_ct", "after_all_ct", "before_mrg_ct", "after_mrg_ct", "before_auth_new", "after_auth_new", "before_commit_new", "after_commit_new") readme_df <- readme_df[,col_order] readme_df$ct_before_all <- str_split(gsub("[][]","", readme_df$before_all_ct), ", ") readme_df$ct_after_all <- str_split(gsub("[][]","", readme_df$after_all_ct), ", ") @@ -36,19 +36,21 @@ window_num <- 8 expanded_data <- expanded_data |> filter(week >= (26 - window_num) & week <= (26 + window_num)) |> mutate(D = ifelse(week > 26, 1, 0)) -#separate out the cleaning +#separate out the cleaning d all_actions_data <- expanded_data[which(expanded_data$observation_type == "all"),] +mean(all_actions_data$count) mrg_actions_data <- expanded_data[which(expanded_data$observation_type == "mrg"),] # 3 rdd in lmer analysis # rdd: https://rpubs.com/phle/r_tutorial_regression_discontinuity_design # lmer: https://www.youtube.com/watch?v=LzAwEKrn2Mc library(lme4) -draft_all_model <- lmer(count ~ D * I(week - 26) + (1|upstream_vcs_link), REML=FALSE, data=all_actions_data) +draft_all_model <- lmer(count ~ D * I(week - 26) + age_of_project + (1 + D |upstream_vcs_link), REML=FALSE, data=all_actions_data) summary(draft_all_model) -draft_mrg_model <- lmer(count ~ D * I(week - 26) + (1|upstream_vcs_link), REML=FALSE, data=mrg_actions_data) +draft_mrg_model <- lmer(count ~ D * I(week - 26) + age_of_project + (1 + D |upstream_vcs_link), REML=FALSE, data=mrg_actions_data) summary(draft_mrg_model) # need to calculate inter-class correlation coefficient? library(merTools) -ICC(outcome="count", group="upstream_vcs_link", data=all_actions_data) +ICC(outcome="count", group="week", data=all_actions_data) #testing for different types of models -# anova \ No newline at end of file +# BIC +library(flexmix) diff --git a/final_data/.~lock.deb_full_data.csv# b/final_data/.~lock.deb_full_data.csv# deleted file mode 100644 index 1b95756..0000000 --- a/final_data/.~lock.deb_full_data.csv# +++ /dev/null @@ -1 +0,0 @@ -Matt Gaughan,mgone,dhcp-10-105-58-199.wireless.northwestern.private,19.04.2024 14:27,file:///Users/mgone/Library/Application%20Support/OpenOffice/4; \ No newline at end of file