1
0
govdoc-cr-analysis/cleaning_scripts/get_weekly_commit_counts.R

46 lines
1.6 KiB
R
Raw Normal View History

2025-01-30 04:24:43 +00:00
library(tidyverse)
# test data directory: /gscratch/comdata/users/mjilg/program_testing/
# load in the paritioned directories
library(dplyr)
library(lubridate)
#for a given file we want to get the count data and produce a csv
test_file <- "/gscratch/comdata/users/mjilg/program_testing/core_2012-01-01_to_2014-12-31.csv"
test_dir <- "/gscratch/comdata/users/mjilg/program_testing/"
transform_commit_data <- function(filepath){
df = read.csv(filepath, header = TRUE)
dir_path = dirname(filepath)
file_name = basename(filepath)
# transform the rows of commit data to weekly count data
project_name <- sub("_[0-9]{4}-[0-9]{2}-[0-9]{2}_to_[0-9]{4}-[0-9]{2}-[0-9]{2}.csv$", "", file_name)
df <- df |>
mutate(commit_date = ymd_hms(commit_date)) |>
mutate(project_name = project_name)
weekly_commits <- df |>
mutate(week = floor_date(commit_date, "week")) |>
group_by(week, project_name) |>
summarise(commit_count = n(), .groups = 'drop')
#prepare to save the new, transformed file
count_path <- file.path(dir_path, "weekly_counts")
count_file_name <- paste0("weeklycount_", file_name)
output_file_path <- file.path(count_path, count_file_name)
#save and gracefully exit
write.csv(weekly_commits, output_file_path, row.names = FALSE)
return(weekly_commits)
}
#then for all files in a directory
transform_directory_of_commit_data <- function(dir_path) {
file_list <- list.files(path = dir_path, pattern = "*.csv", full.names = TRUE)
for (filepath in file_list) {
transform_commit_data(filepath)
}
}
transform_directory_of_commit_data(test_dir)