pivot_longer(cols = starts_with("cnt"), names_to = "window", values_to = "count") |> unnest(count)) longer <- new_test |> pivot_longer(cols = starts_with("cnt"), names_to = "window", values_to = "count") |> unnest(count) longer longer <- new_test |> pivot_longer(cols = starts_with("cnt"), names_to = "window", values_to = "count") |> unnest(count) |> as.numeric(unlist(count)) longer <- new_test |> pivot_longer(cols = starts_with("cnt"), names_to = "window", values_to = "count") |> unnest(count) |> unlist(count) View(new_longer) new_longer longer <- new_test |> pivot_longer(cols = starts_with("cnt"), names_to = "window", values_to = "count") |> unnest(count) |> unlist(count) |> as.numeric(count) View(new_longer) new_longer longer <- new_test |> pivot_longer(cols = starts_with("cnt"), names_to = "window", values_to = "count") |> unnest(count) |> unlist(count) |> as.numeric(count) longer longer <- new_test |> pivot_longer(cols = starts_with("cnt"), names_to = "window", values_to = "count") |> unnest(count) |> unlist(count) longer longer <- new_test |> pivot_longer(cols = starts_with("cnt"), names_to = "window", values_to = "count") |> unnest(count) longer View(longer) longer <- new_test |> pivot_longer(cols = starts_with("cnt"), names_to = "window", values_to = "count") |> unnest(count) |> as.numeric(count) longer longer <- new_test |> pivot_longer(cols = starts_with("cnt"), names_to = "window", values_to = "count") |> unnest(count) longer library(tidyverse) #set wd, read in data try(setwd(dirname(rstudioapi::getActiveDocumentContext()$path))) readme_df <- read_csv("../final_data/deb_readme_did.csv") #preprocessing for readme_df colnames(readme_df) <- c("upstream_vcs_link", "event_date", "event_hash", "before_all_ct", "before_mrg_ct", "after_all_ct", "after_mrg_ct", "before_auth_new", "after_commit_new", "after_auth_new", "before_commit_new") col_order <- c("upstream_vcs_link", "event_date", "event_hash", "before_all_ct", "after_all_ct", "before_mrg_ct", "after_mrg_ct", "before_auth_new", "after_auth_new", "before_commit_new", "after_commit_new") readme_df <- readme_df[,col_order] readme_df$cnt_before_all <- str_split(gsub("[][]","", readme_df$before_all_cnt), ", ") readme_df$cnt_after_all <- str_split(gsub("[][]","", readme_df$after_all_cnt), ", ") readme_df$cnt_before_mrg <- str_split(gsub("[][]","", readme_df$before_mrg_cnt), ", ") readme_df$cnt_after_mrg <- str_split(gsub("[][]","", readme_df$after_mrg_cnt), ", ") drop <- c("before_all_ct", "before_mrg_ct", "after_all_ct", "after_mrg_ct") library(tidyverse) #set wd, read in data try(setwd(dirname(rstudioapi::getActiveDocumentContext()$path))) readme_df <- read_csv("../final_data/deb_readme_did.csv") #preprocessing for readme_df colnames(readme_df) <- c("upstream_vcs_link", "event_date", "event_hash", "before_all_ct", "before_mrg_ct", "after_all_ct", "after_mrg_ct", "before_auth_new", "after_commit_new", "after_auth_new", "before_commit_new") col_order <- c("upstream_vcs_link", "event_date", "event_hash", "before_all_ct", "after_all_ct", "before_mrg_ct", "after_mrg_ct", "before_auth_new", "after_auth_new", "before_commit_new", "after_commit_new") readme_df <- readme_df[,col_order] readme_df$cnt_before_all <- str_split(gsub("[][]","", readme_df$before_all_cnt), ", ") library(tidyverse) #set wd, read in data try(setwd(dirname(rstudioapi::getActiveDocumentContext()$path))) readme_df <- read_csv("../final_data/deb_readme_did.csv") contributing_df <- read_csv("../final_data/deb_contrib_did.csv") #preprocessing for readme_df colnames(readme_df) <- c("upstream_vcs_link", "event_date", "event_hash", "before_all_ct", "before_mrg_ct", "after_all_ct", "after_mrg_ct", "before_auth_new", "after_commit_new", "after_auth_new", "before_commit_new") col_order <- c("upstream_vcs_link", "event_date", "event_hash", "before_all_ct", "after_all_ct", "before_mrg_ct", "after_mrg_ct", "before_auth_new", "after_auth_new", "before_commit_new", "after_commit_new") readme_df <- readme_df[,col_order] readme_df$ct_before_all <- str_split(gsub("[][]","", readme_df$before_all_ct), ", ") readme_df$ct_after_all <- str_split(gsub("[][]","", readme_df$after_all_ct), ", ") readme_df$ct_before_mrg <- str_split(gsub("[][]","", readme_df$before_mrg_ct), ", ") readme_df$ct_after_mrg <- str_split(gsub("[][]","", readme_df$after_mrg_ct), ", ") drop <- c("before_all_ct", "before_mrg_ct", "after_all_ct", "after_mrg_ct") readme_df = readme_df[,!(names(readme_df) %in% drop)] # as.numeric(unlist(test[1])) # test_two <- c() # iterator <- 0 # for (entry in test) { # readme_df$cnt_before_all[iterator] <- as.numeric(unlist(entry)) # print(as.numeric(unlist(entry))) # iterator <- iterator + 1 # } # test_two #Yes, need to expand the dataframe, but again, for the sake of clarity, do not want to until analysis step new_test <- head(readme_df, 1) longer <- new_test |> pivot_longer(cols = starts_with("ct"), names_to = "window", values_to = "count") |> unnest(count) longer View(longer) library(tidyverse) #set wd, read in data try(setwd(dirname(rstudioapi::getActiveDocumentContext()$path))) readme_df <- read_csv("../final_data/deb_readme_did.csv") contributing_df <- read_csv("../final_data/deb_contrib_did.csv") #preprocessing for readme_df colnames(readme_df) <- c("upstream_vcs_link", "event_date", "event_hash", "before_all_ct", "before_mrg_ct", "after_all_ct", "after_mrg_ct", "before_auth_new", "after_commit_new", "after_auth_new", "before_commit_new") col_order <- c("upstream_vcs_link", "event_date", "event_hash", "before_all_ct", "after_all_ct", "before_mrg_ct", "after_mrg_ct", "before_auth_new", "after_auth_new", "before_commit_new", "after_commit_new") readme_df <- readme_df[,col_order] readme_df$ct_before_all <- str_split(gsub("[][]","", readme_df$before_all_ct), ", ") readme_df$ct_after_all <- str_split(gsub("[][]","", readme_df$after_all_ct), ", ") readme_df$ct_before_mrg <- str_split(gsub("[][]","", readme_df$before_mrg_ct), ", ") readme_df$ct_after_mrg <- str_split(gsub("[][]","", readme_df$after_mrg_ct), ", ") drop <- c("before_all_ct", "before_mrg_ct", "after_all_ct", "after_mrg_ct") readme_df = readme_df[,!(names(readme_df) %in% drop)] # as.numeric(unlist(test[1])) # test_two <- c() # iterator <- 0 # for (entry in test) { # readme_df$cnt_before_all[iterator] <- as.numeric(unlist(entry)) # print(as.numeric(unlist(entry))) # iterator <- iterator + 1 # } # test_two #Yes, need to expand the dataframe, but again, for the sake of clarity, do not want to until analysis step new_test <- head(readme_df, 1) longer <- new_test |> pivot_longer(cols = starts_with("ct"), names_to = "window", values_to = "count") |> unnest(count) longer View(longer) longer <- ddply(longer, "window", transform, t=seq(from=0, by=1, length.out=length(window))) library(plyr) longer <- ddply(longer, "window", transform, t=seq(from=0, by=1, length.out=length(window))) View(longer) library(plyr) library(tidyverse) #set wd, read in data try(setwd(dirname(rstudioapi::getActiveDocumentContext()$path))) readme_df <- read_csv("../final_data/deb_readme_did.csv") contributing_df <- read_csv("../final_data/deb_contrib_did.csv") #preprocessing for readme_df colnames(readme_df) <- c("upstream_vcs_link", "event_date", "event_hash", "before_all_ct", "before_mrg_ct", "after_all_ct", "after_mrg_ct", "before_auth_new", "after_commit_new", "after_auth_new", "before_commit_new") col_order <- c("upstream_vcs_link", "event_date", "event_hash", "before_all_ct", "after_all_ct", "before_mrg_ct", "after_mrg_ct", "before_auth_new", "after_auth_new", "before_commit_new", "after_commit_new") readme_df <- readme_df[,col_order] readme_df$ct_before_all <- str_split(gsub("[][]","", readme_df$before_all_ct), ", ") readme_df$ct_after_all <- str_split(gsub("[][]","", readme_df$after_all_ct), ", ") readme_df$ct_before_mrg <- str_split(gsub("[][]","", readme_df$before_mrg_ct), ", ") readme_df$ct_after_mrg <- str_split(gsub("[][]","", readme_df$after_mrg_ct), ", ") drop <- c("before_all_ct", "before_mrg_ct", "after_all_ct", "after_mrg_ct") readme_df = readme_df[,!(names(readme_df) %in% drop)] # as.numeric(unlist(test[1])) # test_two <- c() # iterator <- 0 # for (entry in test) { # readme_df$cnt_before_all[iterator] <- as.numeric(unlist(entry)) # print(as.numeric(unlist(entry))) # iterator <- iterator + 1 # } # test_two #Yes, need to expand the dataframe, but again, for the sake of clarity, do not want to until analysis step new_test <- head(readme_df, 1) longer <- new_test |> pivot_longer(cols = starts_with("ct"), names_to = "window", values_to = "count") |> unnest(count) longer <- ddply(longer, "window", transform, t=seq(from=0, by=1, length.out=length(window))) View(longer) longer <- ddply(longer, strsplit("window", split="_")[-1], transform, week=seq(from=0, by=1, length.out=length(window))) longer <- ddply(longer, strsplit(window, split="_")[-1], transform, week=seq(from=0, by=1, length.out=length(window))) longer <- new_test |> pivot_longer(cols = starts_with("ct"), names_to = "window", values_to = "count") |> unnest(count) |> add_column(rel = gsub("^.*_", "", window)) longer <- new_test |> pivot_longer(cols = starts_with("ct"), names_to = "window", values_to = "count") |> unnest(count) |> add_column(rel = gsub("^.*_", "", "window")) View(longer) longer$rel <- gsub("^.*_", "", longer$window) View(longer) # as.numeric(unlist(test[1])) # test_two <- c() # iterator <- 0 # for (entry in test) { # readme_df$cnt_before_all[iterator] <- as.numeric(unlist(entry)) # print(as.numeric(unlist(entry))) # iterator <- iterator + 1 # } # test_two #Yes, need to expand the dataframe, but again, for the sake of clarity, do not want to until analysis step new_test <- head(readme_df, 1) new_testr$observation_type <- gsub("^.*_", "", new_test$window) # as.numeric(unlist(test[1])) # test_two <- c() # iterator <- 0 # for (entry in test) { # readme_df$cnt_before_all[iterator] <- as.numeric(unlist(entry)) # print(as.numeric(unlist(entry))) # iterator <- iterator + 1 # } # test_two #Yes, need to expand the dataframe, but again, for the sake of clarity, do not want to until analysis step new_test <- head(readme_df, 1) longer <- new_test |> pivot_longer(cols = starts_with("ct"), names_to = "window", values_to = "count") |> unnest(count) longer$observation_type <- gsub("^.*_", "", longer$window) longer <- ddply(longer, "observation_type", transform, week=seq(from=0, by=1, length.out=length(observation_type))) View(longer) head(longer) sapply(longer, class) library(plyr) library(tidyverse) #set wd, read in data try(setwd(dirname(rstudioapi::getActiveDocumentContext()$path))) readme_df <- read_csv("../final_data/deb_readme_did.csv") contributing_df <- read_csv("../final_data/deb_contrib_did.csv") #preprocessing for readme_df colnames(readme_df) <- c("upstream_vcs_link", "event_date", "event_hash", "before_all_ct", "before_mrg_ct", "after_all_ct", "after_mrg_ct", "before_auth_new", "after_commit_new", "after_auth_new", "before_commit_new") col_order <- c("upstream_vcs_link", "event_date", "event_hash", "before_all_ct", "after_all_ct", "before_mrg_ct", "after_mrg_ct", "before_auth_new", "after_auth_new", "before_commit_new", "after_commit_new") readme_df <- readme_df[,col_order] readme_df$ct_before_all <- str_split(gsub("[][]","", readme_df$before_all_ct), ", ") readme_df$ct_after_all <- str_split(gsub("[][]","", readme_df$after_all_ct), ", ") readme_df$ct_before_mrg <- str_split(gsub("[][]","", readme_df$before_mrg_ct), ", ") readme_df$ct_after_mrg <- str_split(gsub("[][]","", readme_df$after_mrg_ct), ", ") drop <- c("before_all_ct", "before_mrg_ct", "after_all_ct", "after_mrg_ct") readme_df = readme_df[,!(names(readme_df) %in% drop)] # as.numeric(unlist(test[1])) # test_two <- c() # iterator <- 0 # for (entry in test) { # readme_df$cnt_before_all[iterator] <- as.numeric(unlist(entry)) # print(as.numeric(unlist(entry))) # iterator <- iterator + 1 # } # test_two #Yes, need to expand the dataframe, but again, for the sake of clarity, do not want to until analysis step new_test <- head(readme_df, 1) longer <- new_test |> pivot_longer(cols = starts_with("ct"), names_to = "window", values_to = "count") |> unnest(count) longer$observation_type <- gsub("^.*_", "", longer$window) longer <- ddply(longer, "observation_type", transform, week=seq(from=0, by=1, length.out=length(observation_type))) View(longer) #testing out analysis below longer[which(longer$observation_type == all)] |> ggplot(aes(x = week, y = count)) + geom_point() + geom_vline(xintercept = 26) #testing out analysis below longer[which(longer$observation_type == "all")] |> ggplot(aes(x = week, y = count)) + geom_point() + geom_vline(xintercept = 26) #testing out analysis below longer[which(longer$observation_type == "all"),] |> ggplot(aes(x = week, y = count)) + geom_point() + geom_vline(xintercept = 26) View(readme_df) # as.numeric(unlist(test[1])) # test_two <- c() # iterator <- 0 # for (entry in test) { # readme_df$cnt_before_all[iterator] <- as.numeric(unlist(entry)) # print(as.numeric(unlist(entry))) # iterator <- iterator + 1 # } # test_two #Yes, need to expand the dataframe, but again, for the sake of clarity, do not want to until analysis step new_test <- readme_df[5,] longer <- new_test |> pivot_longer(cols = starts_with("ct"), names_to = "window", values_to = "count") |> unnest(count) longer$observation_type <- gsub("^.*_", "", longer$window) longer <- ddply(longer, "observation_type", transform, week=seq(from=0, by=1, length.out=length(observation_type))) #testing out analysis below longer[which(longer$observation_type == "all"),] |> ggplot(aes(x = week, y = count)) + geom_point() + geom_vline(xintercept = 26) View(readme_df) # as.numeric(unlist(test[1])) # test_two <- c() # iterator <- 0 # for (entry in test) { # readme_df$cnt_before_all[iterator] <- as.numeric(unlist(entry)) # print(as.numeric(unlist(entry))) # iterator <- iterator + 1 # } # test_two #Yes, need to expand the dataframe, but again, for the sake of clarity, do not want to until analysis step new_test <- readme_df[76,] longer <- new_test |> pivot_longer(cols = starts_with("ct"), names_to = "window", values_to = "count") |> unnest(count) longer$observation_type <- gsub("^.*_", "", longer$window) longer <- ddply(longer, "observation_type", transform, week=seq(from=0, by=1, length.out=length(observation_type))) #testing out analysis below longer[which(longer$observation_type == "all"),] |> ggplot(aes(x = week, y = count)) + geom_point() + geom_vline(xintercept = 26) # as.numeric(unlist(test[1])) # test_two <- c() # iterator <- 0 # for (entry in test) { # readme_df$cnt_before_all[iterator] <- as.numeric(unlist(entry)) # print(as.numeric(unlist(entry))) # iterator <- iterator + 1 # } # test_two #Yes, need to expand the dataframe, but again, for the sake of clarity, do not want to until analysis step new_test <- readme_df[77,] longer <- new_test |> pivot_longer(cols = starts_with("ct"), names_to = "window", values_to = "count") |> unnest(count) longer$observation_type <- gsub("^.*_", "", longer$window) longer <- ddply(longer, "observation_type", transform, week=seq(from=0, by=1, length.out=length(observation_type))) #testing out analysis below longer[which(longer$observation_type == "all"),] |> ggplot(aes(x = week, y = count)) + geom_point() + geom_vline(xintercept = 26) # as.numeric(unlist(test[1])) # test_two <- c() # iterator <- 0 # for (entry in test) { # readme_df$cnt_before_all[iterator] <- as.numeric(unlist(entry)) # print(as.numeric(unlist(entry))) # iterator <- iterator + 1 # } # test_two #Yes, need to expand the dataframe, but again, for the sake of clarity, do not want to until analysis step new_test <- readme_df[143,] longer <- new_test |> pivot_longer(cols = starts_with("ct"), names_to = "window", values_to = "count") |> unnest(count) longer$observation_type <- gsub("^.*_", "", longer$window) longer <- ddply(longer, "observation_type", transform, week=seq(from=0, by=1, length.out=length(observation_type))) #testing out analysis below longer[which(longer$observation_type == "all"),] |> ggplot(aes(x = week, y = count)) + geom_point() + geom_vline(xintercept = 26) # as.numeric(unlist(test[1])) # test_two <- c() # iterator <- 0 # for (entry in test) { # readme_df$cnt_before_all[iterator] <- as.numeric(unlist(entry)) # print(as.numeric(unlist(entry))) # iterator <- iterator + 1 # } # test_two #Yes, need to expand the dataframe, but again, for the sake of clarity, do not want to until analysis step new_test <- readme_df[185,] longer <- new_test |> pivot_longer(cols = starts_with("ct"), names_to = "window", values_to = "count") |> unnest(count) longer$observation_type <- gsub("^.*_", "", longer$window) longer <- ddply(longer, "observation_type", transform, week=seq(from=0, by=1, length.out=length(observation_type))) #testing out analysis below longer[which(longer$observation_type == "all"),] |> ggplot(aes(x = week, y = count)) + geom_point() + geom_vline(xintercept = 26) # as.numeric(unlist(test[1])) # test_two <- c() # iterator <- 0 # for (entry in test) { # readme_df$cnt_before_all[iterator] <- as.numeric(unlist(entry)) # print(as.numeric(unlist(entry))) # iterator <- iterator + 1 # } # test_two #Yes, need to expand the dataframe, but again, for the sake of clarity, do not want to until analysis step new_test <- readme_df[231,] longer <- new_test |> pivot_longer(cols = starts_with("ct"), names_to = "window", values_to = "count") |> unnest(count) longer$observation_type <- gsub("^.*_", "", longer$window) longer <- ddply(longer, "observation_type", transform, week=seq(from=0, by=1, length.out=length(observation_type))) #testing out analysis below longer[which(longer$observation_type == "all"),] |> ggplot(aes(x = week, y = count)) + geom_point() + geom_vline(xintercept = 26) longer[which(longer$observation_type == "all"),] |> mutate(D = ifelse(week >= 26, 1, 0)) |> lm(count ~ D + I(week - 26)) |> summary() longer[which(longer$observation_type == "all"),] |> mutate(D = ifelse(week >= 26, 1, 0)) |> lm(count ~ D * I(week - 26)) |> summary() longer[which(longer$observation_type == "all"),] |> mutate(D = ifelse(week >= 26, 1, 0)) |> lm(formula = count ~ D * I(week - 26)) |> summary() longer[which(longer$observation_type == "all"),] |> select(count, week) |> mutate(D = ifelse(week >= 26, 1, 0)) |> ggplot(aes(x = week, y = count, color = D)) + geom_point() + geom_smooth(method = "lm") longer[which(longer$observation_type == "all"),] |> select(count, week) |> mutate(D = as.factor(ifelse(week >= 26, 1, 0))) |> ggplot(aes(x = week, y = count, color = D)) + geom_point() + geom_smooth(method = "lm") longer[which(longer$observation_type == "all"),] |> select(count, week) |> mutate(D = as.factor(ifelse(week >= 26, 1, 0))) |> ggplot(aes(x = week, y = count, color = D)) + geom_point() + geom_smooth() longer[which(longer$observation_type == "all"),] |> select(count, week) |> mutate(D = as.factor(ifelse(week >= 26, 1, 0))) |> ggplot(aes(x = week, y = count, color = D)) + geom_point() + geom_smooth() longer[which(longer$observation_type == "all"),] |> select(count, week) |> mutate(D = as.factor(ifelse(week >= 26, 1, 0))) |> ggplot(aes(x = week, y = count, color = D)) + geom_point() + geom_smooth(aes(x = week, y = count, color = D)) longer[which(longer$observation_type == "all"),] |> select(count, week) |> mutate(D = as.factor(ifelse(week >= 26, 1, 0))) |> ggplot(aes(x = week, y = count, color = D)) + geom_point() + geom_smooth() sapply(longer, class) longer$count <- as.numeric(longer$count) sapply(longer, class) #testing out analysis below longer[which(longer$observation_type == "all"),] |> ggplot(aes(x = week, y = count)) + geom_point() + geom_vline(xintercept = 26) longer[which(longer$observation_type == "all"),] |> mutate(D = ifelse(week >= 26, 1, 0)) |> lm(formula = count ~ D * I(week - 26)) |> summary() longer[which(longer$observation_type == "all"),] |> select(count, week) |> mutate(D = as.factor(ifelse(week >= 26, 1, 0))) |> ggplot(aes(x = week, y = count, color = D)) + geom_point() + geom_smooth() longer[which(longer$observation_type == "all"),] |> select(count, week) |> mutate(D = as.factor(ifelse(week >= 26, 1, 0))) |> ggplot(aes(x = week, y = count, color = D)) + geom_point() + geom_smooth(se = False) longer[which(longer$observation_type == "all"),] |> select(count, week) |> mutate(D = as.factor(ifelse(week >= 26, 1, 0))) |> ggplot(aes(x = week, y = count, color = D)) + geom_point() + geom_smooth(se = FALSE)