diff --git a/commit_activity_analysis/get_weekly_commit_counts.R b/commit_activity_analysis/get_weekly_commit_counts.R new file mode 100644 index 0000000..3872fa7 --- /dev/null +++ b/commit_activity_analysis/get_weekly_commit_counts.R @@ -0,0 +1,45 @@ +library(tidyverse) +# test data directory: /gscratch/comdata/users/mjilg/program_testing/ +# load in the paritioned directories +library(dplyr) +library(lubridate) + +#for a given file we want to get the count data and produce a csv +test_file <- "/gscratch/comdata/users/mjilg/program_testing/core_2012-01-01_to_2014-12-31.csv" +test_dir <- "/gscratch/comdata/users/mjilg/program_testing/" + +transform_commit_data <- function(filepath){ + df = read.csv(filepath, header = TRUE) + dir_path = dirname(filepath) + file_name = basename(filepath) + + # transform the rows of commit data to weekly count data + project_name <- sub("_[0-9]{4}-[0-9]{2}-[0-9]{2}_to_[0-9]{4}-[0-9]{2}-[0-9]{2}.csv$", "", file_name) + + df <- df |> + mutate(commit_date = ymd_hms(commit_date)) |> + mutate(project_name = project_name) + + weekly_commits <- df |> + mutate(week = floor_date(commit_date, "week")) |> + group_by(week, project_name) |> + summarise(commit_count = n(), .groups = 'drop') + + #prepare to save the new, transformed file + count_path <- file.path(dir_path, "weekly_counts") + count_file_name <- paste0("weeklycount_", file_name) + output_file_path <- file.path(count_path, count_file_name) + #save and gracefully exit + write.csv(weekly_commits, output_file_path, row.names = FALSE) + return(weekly_commits) +} + +#then for all files in a directory +transform_directory_of_commit_data <- function(dir_path) { + file_list <- list.files(path = dir_path, pattern = "*.csv", full.names = TRUE) + for (filepath in file_list) { + transform_commit_data(filepath) + } +} + +transform_directory_of_commit_data(test_dir)