library(tidyverse) library(jsonlite) library(lubridate) ## TODO: get the within-case seniority main_csv <-"~/analysis_data/100725_bivariate_data.csv" main_df <- read.csv(main_csv, header = TRUE) #task_df <- main_df |> # filter(comment_type == "task_description") main_df <- main_df |> mutate(new_priority_score = ifelse(priority_score == 90, NA, priority_score)) library(ggdist) ggplot(main_df, aes(x = week_index, y = n_comments, fill = isAuthorWMF)) + facet_grid(~source) + geom_dots(side = "both", layout = "hex", stackratio = 0.92) + scale_fill_viridis_d() + xlim(-130, 15) + theme_minimal() + labs( title = "PCs for Task Descriptions (Faceted by Source and Phase)", x = "week_index", y = "Priority_Score", fill = "isAuthorWMF?" ) library(ggplot2) ggplot(main_df, aes(y = new_priority_score, x = resolution_outcome, fill = isAuthorWMF)) + facet_grid(source~phase) + stat_histinterval()+ theme_minimal() + labs( title = "Histogram of triaged priority scores by task outcome and affiliation (faceted by source and phase)", x = "on-time resolution (wide release date +90 days)", y = "priority score (post-triage)", fill = "isTaskAuthorWMF?" ) main_df %>% count(phase, source, resolution_outcome, isAuthorWMF) %>% ggplot(aes( y = resolution_outcome, x = isAuthorWMF, fill = n, label = n )) + facet_grid(source ~ phase) + geom_tile() + geom_text(size = 5, color = "white") + scale_fill_viridis_c() + theme_minimal() + labs( title = "Count of Tasks by on-time resolution", x = "isTaskAuthorWMF?", y = "on-time resolution (wide release date +90 days)", fill = "count of tasks" ) ggplot(main_df, aes(y = resolution_outcome, x = week_index, fill = isAuthorWMF)) + facet_grid(~source) + stat_dots(position = "dodgejust", quantiles = 100, color = NA) + theme_minimal() + labs( title = "centile (1/100) distribution dot plot of tasks (faceted by source)", x = "week_index of task filed", y = "on-time resolution (wide release date +90 days) ", fill = "isTaskAuthorWMF?" ) ggplot(main_df, aes(y = priority_score, x = week_index, color = resolution_outcome)) + facet_grid(~source) + geom_point() + geom_smooth() + theme_minimal() + labs( title = "week_index x priority_score(faceted by source)", x = "week_index of task filed", y = "triaged priority score", color = "on-time resolution (wide release date +90 days)" ) ggplot(main_df, aes(y = priority_score, x = median_gerrit_reviewers, color = isAuthorWMF)) + facet_grid(~source) + geom_point() + theme_minimal() + labs( title = "gerrit reviewers x priority_score(faceted by source)", x = "median # of gerrit reviewers for linked PRs", y = "triaged priority score", color = "isAuthorWMF?" )