diff --git a/R/.Rhistory b/R/.Rhistory index b1eb01c..a7eab43 100644 --- a/R/.Rhistory +++ b/R/.Rhistory @@ -1,62 +1,3 @@ -longer <- longer %>% -filter(week >= (26 - window_num) & week <= (26 + window_num)) -#testing out analysis below -longer[which(longer$observation_type == "all"),] |> -ggplot(aes(x = week, y = count)) + -geom_point() + -geom_vline(xintercept = 26) -longer[which(longer$observation_type == "all"),] |> -mutate(D = ifelse(week >= 26, 1, 0)) |> -lm(formula = count ~ D * I(week - 26)) |> -summary() -longer[which(longer$observation_type == "all"),] |> -select(count, week) |> -mutate(D = as.factor(ifelse(week >= 26, 1, 0))) |> -ggplot(aes(x = week, y = count, color = D)) + -geom_point() + -geom_smooth(se = FALSE) + -geom_vline(xintercept = 26) -window_num <- 4 -longer <- longer %>% -filter(week >= (26 - window_num) & week <= (26 + window_num)) -#testing out analysis below -longer[which(longer$observation_type == "all"),] |> -ggplot(aes(x = week, y = count)) + -geom_point() + -geom_vline(xintercept = 26) -longer[which(longer$observation_type == "all"),] |> -mutate(D = ifelse(week >= 26, 1, 0)) |> -lm(formula = count ~ D * I(week - 26)) |> -summary() -longer[which(longer$observation_type == "all"),] |> -select(count, week) |> -mutate(D = as.factor(ifelse(week >= 26, 1, 0))) |> -ggplot(aes(x = week, y = count, color = D)) + -geom_point() + -geom_smooth(se = FALSE) + -geom_vline(xintercept = 26) -window_num <- 10 -longer <- longer %>% -filter(week >= (26 - window_num) & week <= (26 + window_num)) -# test_two <- c() -# iterator <- 0 -# for (entry in test) { -# readme_df$cnt_before_all[iterator] <- as.numeric(unlist(entry)) -# print(as.numeric(unlist(entry))) -# iterator <- iterator + 1 -# } -# test_two -#Yes, need to expand the dataframe, but again, for the sake of clarity, do not want to until analysis step -# https://rpubs.com/phle/r_tutorial_regression_discontinuity_design -new_test <- readme_df[450,] -longer <- new_test |> -pivot_longer(cols = starts_with("ct"), -names_to = "window", -values_to = "count") |> -unnest(count) -longer$observation_type <- gsub("^.*_", "", longer$window) -longer <- ddply(longer, "observation_type", transform, week=seq(from=0, by=1, length.out=length(observation_type))) -longer$count <- as.numeric(longer$count) window_num <- 10 longer <- longer %>% filter(week >= (26 - window_num) & week <= (26 + window_num)) @@ -510,3 +451,62 @@ draft_model <- lmer(count ~ D * I(week - 26) + (1|as.factor(upstream_vcs_link)), draft_model <- lmer(count ~ D * I(week - 26) + (1|as.factor(upstream_vcs_link)), REML=FALSE, data=expanded_data[which(expanded_data$observation_type == "all"),]) draft_model <- lmer(count ~ D * I(week - 26) + (1|upstream_vcs_link), REML=FALSE, data=expanded_data[which(expanded_data$observation_type == "all"),]) summary(draft_model) +# this is the file with the lmer multi-level rddAnalysis +library(tidyverse) +library(plyr) +# 0 loading the readme data in +try(setwd(dirname(rstudioapi::getActiveDocumentContext()$path))) +readme_df <- read_csv("../final_data/deb_readme_did.csv") +# 1 preprocessing +colnames(readme_df) <- c("upstream_vcs_link", "event_date", "event_hash", "before_all_ct", "before_mrg_ct", "after_all_ct", "after_mrg_ct", "before_auth_new", "after_commit_new", "after_auth_new", "before_commit_new") +col_order <- c("upstream_vcs_link", "event_date", "event_hash", "before_all_ct", "after_all_ct", "before_mrg_ct", "after_mrg_ct", "before_auth_new", "after_auth_new", "before_commit_new", "after_commit_new") +readme_df <- readme_df[,col_order] +readme_df$ct_before_all <- str_split(gsub("[][]","", readme_df$before_all_ct), ", ") +readme_df$ct_after_all <- str_split(gsub("[][]","", readme_df$after_all_ct), ", ") +readme_df$ct_before_mrg <- str_split(gsub("[][]","", readme_df$before_mrg_ct), ", ") +readme_df$ct_after_mrg <- str_split(gsub("[][]","", readme_df$after_mrg_ct), ", ") +drop <- c("before_all_ct", "before_mrg_ct", "after_all_ct", "after_mrg_ct") +readme_df = readme_df[,!(names(readme_df) %in% drop)] +# 2 some expansion needs to happens for each project +expand_timeseries <- function(project_row) { +longer <- project_row |> +pivot_longer(cols = starts_with("ct"), +names_to = "window", +values_to = "count") |> +unnest(count) +longer$observation_type <- gsub("^.*_", "", longer$window) +longer <- ddply(longer, "observation_type", transform, week=seq(from=0, by=1, length.out=length(observation_type))) +longer$count <- as.numeric(longer$count) +#longer <- longer[which(longer$observation_type == "all"),] +return(longer) +} +expanded_data <- expand_timeseries(readme_df[1,]) +for (i in 2:nrow(readme_df)){ +expanded_data <- rbind(expanded_data, expand_timeseries(readme_df[i,])) +} +#filter out the timewindows +window_num <- 8 +expanded_data <- expanded_data |> +filter(week >= (26 - window_num) & week <= (26 + window_num)) |> +mutate(D = ifelse(week >= 26, 1, 0)) +# 3 rdd in lmer analysis +library(lme4) +draft_model <- lmer(count ~ D * I(week - 26) + (1|upstream_vcs_link), REML=FALSE, data=expanded_data[which(expanded_data$observation_type == "all"),]) +summary(draft_model) +expanded_data <- expanded_data |> +filter(week >= (26 - window_num) & week <= (26 + window_num)) |> +mutate(D = ifelse(week > 26, 1, 0)) +# 3 rdd in lmer analysis +# rdd: https://rpubs.com/phle/r_tutorial_regression_discontinuity_design +# lmer: https://www.youtube.com/watch?v=LzAwEKrn2Mc +library(lme4) +draft_model <- lmer(count ~ D * I(week - 26) + (1|upstream_vcs_link), REML=FALSE, data=expanded_data[which(expanded_data$observation_type == "all"),]) +summary(draft_model) +View(expanded_data) +draft_all_model <- lmer(count ~ D * I(week - 26) + (1|upstream_vcs_link), REML=FALSE, data=expanded_data[which(expanded_data$observation_type == "all"),]) +summary(draft_all_model) +draft_mrg_model <- lmer(count ~ D * I(week - 26) + (1|upstream_vcs_link), REML=FALSE, data=expanded_data[which(expanded_data$observation_type == "mrg"),]) +summary(draft_mrg_model) +draft_all_model <- lmer(count ~ D * I(week - 26) + (1|upstream_vcs_link), REML=TRUE, data=expanded_data[which(expanded_data$observation_type == "all"),]) +summary(draft_all_model) +summary(draft_all_model)