diff --git a/R/.RData b/R/.RData index 6d72e29..d17abb1 100644 Binary files a/R/.RData and b/R/.RData differ diff --git a/R/didAnalyses.R b/R/didAnalyses.R deleted file mode 100644 index fa41543..0000000 --- a/R/didAnalyses.R +++ /dev/null @@ -1,4 +0,0 @@ -library(tidyverse) - -#set wd -try(setwd(dirname(rstudioapi::getActiveDocumentContext()$path))) diff --git a/R/didCleaning.R b/R/didCleaning.R new file mode 100644 index 0000000..b9e5d15 --- /dev/null +++ b/R/didCleaning.R @@ -0,0 +1,39 @@ +library(tidyverse) + +#set wd, read in data +try(setwd(dirname(rstudioapi::getActiveDocumentContext()$path))) +readme_df <- read_csv("../final_data/deb_readme_did.csv") +contributing_df <- read_csv("../final_data/deb_contrib_did.csv") + +#preprocessing for readme_df +colnames(readme_df) <- c("upstream_vcs_link", "event_date", "event_hash", "before_all_ct", "before_mrg_ct", "after_all_ct", "after_mrg_ct", "before_auth_new", "after_commit_new", "after_auth_new", "before_commit_new") +col_order <- c("upstream_vcs_link", "event_date", "event_hash", "before_all_ct", "after_all_ct", "before_mrg_ct", "after_mrg_ct", "before_auth_new", "after_auth_new", "before_commit_new", "after_commit_new") +readme_df <- readme_df[,col_order] +readme_df$ct_before_all <- str_split(gsub("[][]","", readme_df$before_all_ct), ", ") +readme_df$ct_after_all <- str_split(gsub("[][]","", readme_df$after_all_ct), ", ") +readme_df$ct_before_mrg <- str_split(gsub("[][]","", readme_df$before_mrg_ct), ", ") +readme_df$ct_after_mrg <- str_split(gsub("[][]","", readme_df$after_mrg_ct), ", ") +drop <- c("before_all_ct", "before_mrg_ct", "after_all_ct", "after_mrg_ct") +readme_df = readme_df[,!(names(readme_df) %in% drop)] + +#preprocessing for contributing_df + + +# test <- readme_df$cnt_before_all +# as.numeric(unlist(test[1])) +# test_two <- c() +# iterator <- 0 +# for (entry in test) { +# readme_df$cnt_before_all[iterator] <- as.numeric(unlist(entry)) +# print(as.numeric(unlist(entry))) +# iterator <- iterator + 1 +# } +# test_two +#Yes, need to expand the dataframe, but again, for the sake of clarity, do not want to until analysis step +new_test <- head(readme_df, 1) +longer <- new_test |> + pivot_longer(cols = starts_with("ct"), + names_to = "window", + values_to = "count") |> + unnest(count) +longer