library(tidyverse) # test data directory: /gscratch/comdata/users/mjilg/program_testing/ # load in the paritioned directories library(dplyr) library(lubridate) #for a given file we want to get the count data and produce a csv test_file <- "/gscratch/comdata/users/mjilg/program_testing/core_2012-01-01_to_2014-12-31.csv" test_dir <- "/gscratch/comdata/users/mjilg/program_testing/" transform_commit_data <- function(filepath){ df = read.csv(filepath, header = TRUE) dir_path = dirname(filepath) file_name = basename(filepath) # transform the rows of commit data to weekly count data project_name <- sub("_[0-9]{4}-[0-9]{2}-[0-9]{2}_to_[0-9]{4}-[0-9]{2}-[0-9]{2}.csv$", "", file_name) df <- df |> mutate(commit_date = ymd_hms(commit_date)) |> mutate(project_name = project_name) weekly_commits <- df |> mutate(week = floor_date(commit_date, "week")) |> group_by(week, project_name) |> summarise(commit_count = n(), .groups = 'drop') #prepare to save the new, transformed file count_path <- file.path(dir_path, "weekly_counts") count_file_name <- paste0("weeklycount_", file_name) output_file_path <- file.path(count_path, count_file_name) #save and gracefully exit write.csv(weekly_commits, output_file_path, row.names = FALSE) return(weekly_commits) } #then for all files in a directory transform_directory_of_commit_data <- function(dir_path) { file_list <- list.files(path = dir_path, pattern = "*.csv", full.names = TRUE) for (filepath in file_list) { transform_commit_data(filepath) } } transform_directory_of_commit_data(test_dir)