adding in longitudinal_text_analysis
This commit is contained in:
parent
406c50f677
commit
7a433fc364
@ -1,45 +0,0 @@
|
|||||||
library(tidyverse)
|
|
||||||
# test data directory: /gscratch/comdata/users/mjilg/program_testing/
|
|
||||||
# load in the paritioned directories
|
|
||||||
library(dplyr)
|
|
||||||
library(lubridate)
|
|
||||||
|
|
||||||
#for a given file we want to get the count data and produce a csv
|
|
||||||
test_file <- "/gscratch/comdata/users/mjilg/program_testing/core_2012-01-01_to_2014-12-31.csv"
|
|
||||||
test_dir <- "/gscratch/comdata/users/mjilg/program_testing/"
|
|
||||||
|
|
||||||
transform_commit_data <- function(filepath){
|
|
||||||
df = read.csv(filepath, header = TRUE)
|
|
||||||
dir_path = dirname(filepath)
|
|
||||||
file_name = basename(filepath)
|
|
||||||
|
|
||||||
# transform the rows of commit data to weekly count data
|
|
||||||
project_name <- sub("_[0-9]{4}-[0-9]{2}-[0-9]{2}_to_[0-9]{4}-[0-9]{2}-[0-9]{2}.csv$", "", file_name)
|
|
||||||
|
|
||||||
df <- df |>
|
|
||||||
mutate(commit_date = ymd_hms(commit_date)) |>
|
|
||||||
mutate(project_name = project_name)
|
|
||||||
|
|
||||||
weekly_commits <- df |>
|
|
||||||
mutate(week = floor_date(commit_date, "week")) |>
|
|
||||||
group_by(week, project_name) |>
|
|
||||||
summarise(commit_count = n(), .groups = 'drop')
|
|
||||||
|
|
||||||
#prepare to save the new, transformed file
|
|
||||||
count_path <- file.path(dir_path, "weekly_counts")
|
|
||||||
count_file_name <- paste0("weeklycount_", file_name)
|
|
||||||
output_file_path <- file.path(count_path, count_file_name)
|
|
||||||
#save and gracefully exit
|
|
||||||
write.csv(weekly_commits, output_file_path, row.names = FALSE)
|
|
||||||
return(weekly_commits)
|
|
||||||
}
|
|
||||||
|
|
||||||
#then for all files in a directory
|
|
||||||
transform_directory_of_commit_data <- function(dir_path) {
|
|
||||||
file_list <- list.files(path = dir_path, pattern = "*.csv", full.names = TRUE)
|
|
||||||
for (filepath in file_list) {
|
|
||||||
transform_commit_data(filepath)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
transform_directory_of_commit_data(test_dir)
|
|
15
text_analysis/longitudinal_analysis.R
Normal file
15
text_analysis/longitudinal_analysis.R
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
library(dplyr)
|
||||||
|
library(ggplot2)
|
||||||
|
phab_data_path <- "/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/0205_convo_data/phab_data/visualeditor/0205_ve_phab_comments.csv"
|
||||||
|
phab_data <- read.csv(phab_data_path, header=TRUE)
|
||||||
|
|
||||||
|
phab_data <- phab_data |>
|
||||||
|
mutate(has_ref = grepl("bots", comment_text)) |>
|
||||||
|
mutate(timestamp = as.POSIXct(date_created, origin = "1970-01-01", tz = "UTC")) |>
|
||||||
|
filter(date_created < 1518232866 & date_created > 1356923678)
|
||||||
|
|
||||||
|
|
||||||
|
g <- ggplot(phab_data, aes(x=timestamp, y=has_ref)) +
|
||||||
|
geom_point(alpha = 0.5) +
|
||||||
|
theme_minimal()
|
||||||
|
g
|
Loading…
Reference in New Issue
Block a user