adding in longitudinal_text_analysis
This commit is contained in:
		
							parent
							
								
									406c50f677
								
							
						
					
					
						commit
						7a433fc364
					
				| @ -1,45 +0,0 @@ | ||||
| library(tidyverse) | ||||
| # test data directory: /gscratch/comdata/users/mjilg/program_testing/ | ||||
| # load in the paritioned directories | ||||
| library(dplyr) | ||||
| library(lubridate) | ||||
| 
 | ||||
| #for a given file we want to get the count data and produce a csv | ||||
| test_file <- "/gscratch/comdata/users/mjilg/program_testing/core_2012-01-01_to_2014-12-31.csv" | ||||
| test_dir <- "/gscratch/comdata/users/mjilg/program_testing/" | ||||
| 
 | ||||
| transform_commit_data <- function(filepath){ | ||||
|   df = read.csv(filepath, header = TRUE)  | ||||
|   dir_path = dirname(filepath) | ||||
|   file_name = basename(filepath) | ||||
|    | ||||
|   # transform the rows of commit data to weekly count data | ||||
|   project_name <- sub("_[0-9]{4}-[0-9]{2}-[0-9]{2}_to_[0-9]{4}-[0-9]{2}-[0-9]{2}.csv$", "", file_name) | ||||
|    | ||||
|   df <- df |> | ||||
|     mutate(commit_date = ymd_hms(commit_date)) |> | ||||
|     mutate(project_name = project_name) | ||||
|    | ||||
|   weekly_commits <- df |> | ||||
|     mutate(week = floor_date(commit_date, "week")) |> | ||||
|     group_by(week, project_name) |> | ||||
|     summarise(commit_count = n(), .groups = 'drop') | ||||
|    | ||||
|   #prepare to save the new, transformed file | ||||
|   count_path <- file.path(dir_path, "weekly_counts") | ||||
|   count_file_name <- paste0("weeklycount_", file_name) | ||||
|   output_file_path <- file.path(count_path, count_file_name) | ||||
|   #save and gracefully exit | ||||
|   write.csv(weekly_commits, output_file_path, row.names = FALSE) | ||||
|   return(weekly_commits) | ||||
| } | ||||
| 
 | ||||
| #then for all files in a directory | ||||
| transform_directory_of_commit_data <- function(dir_path) { | ||||
|   file_list <- list.files(path = dir_path, pattern = "*.csv", full.names = TRUE) | ||||
|   for (filepath in file_list) { | ||||
|     transform_commit_data(filepath) | ||||
|   } | ||||
| } | ||||
| 
 | ||||
| transform_directory_of_commit_data(test_dir) | ||||
							
								
								
									
										15
									
								
								text_analysis/longitudinal_analysis.R
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										15
									
								
								text_analysis/longitudinal_analysis.R
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,15 @@ | ||||
| library(dplyr) | ||||
| library(ggplot2) | ||||
| phab_data_path <- "/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/0205_convo_data/phab_data/visualeditor/0205_ve_phab_comments.csv" | ||||
| phab_data <- read.csv(phab_data_path, header=TRUE) | ||||
| 
 | ||||
| phab_data <- phab_data |> | ||||
|   mutate(has_ref = grepl("bots", comment_text)) |> | ||||
|   mutate(timestamp = as.POSIXct(date_created, origin = "1970-01-01", tz = "UTC")) |> | ||||
|   filter(date_created < 1518232866 & date_created > 1356923678) | ||||
| 
 | ||||
| 
 | ||||
| g <- ggplot(phab_data, aes(x=timestamp, y=has_ref)) + | ||||
|   geom_point(alpha = 0.5) +  | ||||
|   theme_minimal() | ||||
| g | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user