new script for transforming git data
This commit is contained in:
		
							parent
							
								
									27a9781564
								
							
						
					
					
						commit
						406c50f677
					
				
							
								
								
									
										45
									
								
								commit_activity_analysis/get_weekly_commit_counts.R
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										45
									
								
								commit_activity_analysis/get_weekly_commit_counts.R
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,45 @@ | ||||
| library(tidyverse) | ||||
| # test data directory: /gscratch/comdata/users/mjilg/program_testing/ | ||||
| # load in the paritioned directories | ||||
| library(dplyr) | ||||
| library(lubridate) | ||||
| 
 | ||||
| #for a given file we want to get the count data and produce a csv | ||||
| test_file <- "/gscratch/comdata/users/mjilg/program_testing/core_2012-01-01_to_2014-12-31.csv" | ||||
| test_dir <- "/gscratch/comdata/users/mjilg/program_testing/" | ||||
| 
 | ||||
| transform_commit_data <- function(filepath){ | ||||
|   df = read.csv(filepath, header = TRUE)  | ||||
|   dir_path = dirname(filepath) | ||||
|   file_name = basename(filepath) | ||||
|    | ||||
|   # transform the rows of commit data to weekly count data | ||||
|   project_name <- sub("_[0-9]{4}-[0-9]{2}-[0-9]{2}_to_[0-9]{4}-[0-9]{2}-[0-9]{2}.csv$", "", file_name) | ||||
|    | ||||
|   df <- df |> | ||||
|     mutate(commit_date = ymd_hms(commit_date)) |> | ||||
|     mutate(project_name = project_name) | ||||
|    | ||||
|   weekly_commits <- df |> | ||||
|     mutate(week = floor_date(commit_date, "week")) |> | ||||
|     group_by(week, project_name) |> | ||||
|     summarise(commit_count = n(), .groups = 'drop') | ||||
|    | ||||
|   #prepare to save the new, transformed file | ||||
|   count_path <- file.path(dir_path, "weekly_counts") | ||||
|   count_file_name <- paste0("weeklycount_", file_name) | ||||
|   output_file_path <- file.path(count_path, count_file_name) | ||||
|   #save and gracefully exit | ||||
|   write.csv(weekly_commits, output_file_path, row.names = FALSE) | ||||
|   return(weekly_commits) | ||||
| } | ||||
| 
 | ||||
| #then for all files in a directory | ||||
| transform_directory_of_commit_data <- function(dir_path) { | ||||
|   file_list <- list.files(path = dir_path, pattern = "*.csv", full.names = TRUE) | ||||
|   for (filepath in file_list) { | ||||
|     transform_commit_data(filepath) | ||||
|   } | ||||
| } | ||||
| 
 | ||||
| transform_directory_of_commit_data(test_dir) | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user