library(plyr) library(tidyverse) #set wd, read in data try(setwd(dirname(rstudioapi::getActiveDocumentContext()$path))) readme_df <- read_csv("../final_data/deb_readme_did.csv") contributing_df <- read_csv("../final_data/deb_contrib_did.csv") #preprocessing for readme_df colnames(readme_df) <- c("upstream_vcs_link", "event_date", "event_hash", "before_all_ct", "before_mrg_ct", "after_all_ct", "after_mrg_ct", "before_auth_new", "after_commit_new", "after_auth_new", "before_commit_new") col_order <- c("upstream_vcs_link", "event_date", "event_hash", "before_all_ct", "after_all_ct", "before_mrg_ct", "after_mrg_ct", "before_auth_new", "after_auth_new", "before_commit_new", "after_commit_new") readme_df <- readme_df[,col_order] readme_df$ct_before_all <- str_split(gsub("[][]","", readme_df$before_all_ct), ", ") readme_df$ct_after_all <- str_split(gsub("[][]","", readme_df$after_all_ct), ", ") readme_df$ct_before_mrg <- str_split(gsub("[][]","", readme_df$before_mrg_ct), ", ") readme_df$ct_after_mrg <- str_split(gsub("[][]","", readme_df$after_mrg_ct), ", ") drop <- c("before_all_ct", "before_mrg_ct", "after_all_ct", "after_mrg_ct") readme_df = readme_df[,!(names(readme_df) %in% drop)] #preprocessing for contributing_df # test <- readme_df$cnt_before_all # as.numeric(unlist(test[1])) # test_two <- c() # iterator <- 0 # for (entry in test) { # readme_df$cnt_before_all[iterator] <- as.numeric(unlist(entry)) # print(as.numeric(unlist(entry))) # iterator <- iterator + 1 # } # test_two #Yes, need to expand the dataframe, but again, for the sake of clarity, do not want to until analysis step new_test <- readme_df[231,] longer <- new_test |> pivot_longer(cols = starts_with("ct"), names_to = "window", values_to = "count") |> unnest(count) longer$observation_type <- gsub("^.*_", "", longer$window) longer <- ddply(longer, "observation_type", transform, week=seq(from=0, by=1, length.out=length(observation_type))) longer$count <- as.numeric(longer$count) #sapply(longer, class) #testing out analysis below longer[which(longer$observation_type == "all"),] |> ggplot(aes(x = week, y = count)) + geom_point() + geom_vline(xintercept = 26) longer[which(longer$observation_type == "all"),] |> mutate(D = ifelse(week >= 26, 1, 0)) |> lm(formula = count ~ D * I(week - 26)) |> summary() longer[which(longer$observation_type == "all"),] |> select(count, week) |> mutate(D = as.factor(ifelse(week >= 26, 1, 0))) |> ggplot(aes(x = week, y = count, color = D)) + geom_point() + geom_smooth(se = FALSE)