diff --git a/.RData b/.RData new file mode 100644 index 0000000..8a3c7be Binary files /dev/null and b/.RData differ diff --git a/commit_activity_analysis/get_weekly_commit_counts.R b/commit_activity_analysis/get_weekly_commit_counts.R deleted file mode 100644 index 3872fa7..0000000 --- a/commit_activity_analysis/get_weekly_commit_counts.R +++ /dev/null @@ -1,45 +0,0 @@ -library(tidyverse) -# test data directory: /gscratch/comdata/users/mjilg/program_testing/ -# load in the paritioned directories -library(dplyr) -library(lubridate) - -#for a given file we want to get the count data and produce a csv -test_file <- "/gscratch/comdata/users/mjilg/program_testing/core_2012-01-01_to_2014-12-31.csv" -test_dir <- "/gscratch/comdata/users/mjilg/program_testing/" - -transform_commit_data <- function(filepath){ - df = read.csv(filepath, header = TRUE) - dir_path = dirname(filepath) - file_name = basename(filepath) - - # transform the rows of commit data to weekly count data - project_name <- sub("_[0-9]{4}-[0-9]{2}-[0-9]{2}_to_[0-9]{4}-[0-9]{2}-[0-9]{2}.csv$", "", file_name) - - df <- df |> - mutate(commit_date = ymd_hms(commit_date)) |> - mutate(project_name = project_name) - - weekly_commits <- df |> - mutate(week = floor_date(commit_date, "week")) |> - group_by(week, project_name) |> - summarise(commit_count = n(), .groups = 'drop') - - #prepare to save the new, transformed file - count_path <- file.path(dir_path, "weekly_counts") - count_file_name <- paste0("weeklycount_", file_name) - output_file_path <- file.path(count_path, count_file_name) - #save and gracefully exit - write.csv(weekly_commits, output_file_path, row.names = FALSE) - return(weekly_commits) -} - -#then for all files in a directory -transform_directory_of_commit_data <- function(dir_path) { - file_list <- list.files(path = dir_path, pattern = "*.csv", full.names = TRUE) - for (filepath in file_list) { - transform_commit_data(filepath) - } -} - -transform_directory_of_commit_data(test_dir) diff --git a/text_analysis/longitudinal_analysis.R b/text_analysis/longitudinal_analysis.R new file mode 100644 index 0000000..7dd944d --- /dev/null +++ b/text_analysis/longitudinal_analysis.R @@ -0,0 +1,15 @@ +library(dplyr) +library(ggplot2) +phab_data_path <- "/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/0205_convo_data/phab_data/visualeditor/0205_ve_phab_comments.csv" +phab_data <- read.csv(phab_data_path, header=TRUE) + +phab_data <- phab_data |> + mutate(has_ref = grepl("bots", comment_text)) |> + mutate(timestamp = as.POSIXct(date_created, origin = "1970-01-01", tz = "UTC")) |> + filter(date_created < 1518232866 & date_created > 1356923678) + + +g <- ggplot(phab_data, aes(x=timestamp, y=has_ref)) + + geom_point(alpha = 0.5) + + theme_minimal() +g