1
0
mw-lifecycle-analysis/analysis_data/100725_bivariate_plots.R

95 lines
2.8 KiB
R

library(tidyverse)
library(jsonlite)
library(lubridate)
## TODO: get the within-case seniority
main_csv <-"~/analysis_data/100725_bivariate_data.csv"
main_df <- read.csv(main_csv, header = TRUE)
#task_df <- main_df |>
# filter(comment_type == "task_description")
main_df <- main_df |>
mutate(new_priority_score = ifelse(priority_score == 90, NA, priority_score))
library(ggdist)
ggplot(main_df, aes(x = week_index, y = n_comments, fill = isAuthorWMF)) +
facet_grid(~source) +
geom_dots(side = "both", layout = "hex", stackratio = 0.92) +
scale_fill_viridis_d() +
xlim(-130, 15) +
theme_minimal() +
labs(
title = "PCs for Task Descriptions (Faceted by Source and Phase)",
x = "week_index",
y = "Priority_Score",
fill = "isAuthorWMF?"
)
library(ggplot2)
ggplot(main_df, aes(y = new_priority_score, x = resolution_outcome, fill = isAuthorWMF)) +
facet_grid(source~phase) +
stat_histinterval()+
theme_minimal() +
labs(
title = "Histogram of triaged priority scores by task outcome and affiliation (faceted by source and phase)",
x = "on-time resolution (wide release date +90 days)",
y = "priority score (post-triage)",
fill = "isTaskAuthorWMF?"
)
main_df %>%
count(phase, source, resolution_outcome, isAuthorWMF) %>%
ggplot(aes(
y = resolution_outcome,
x = isAuthorWMF,
fill = n,
label = n
)) +
facet_grid(source ~ phase) +
geom_tile() +
geom_text(size = 5, color = "white") +
scale_fill_viridis_c() +
theme_minimal() +
labs(
title = "Count of Tasks by on-time resolution",
x = "isTaskAuthorWMF?",
y = "on-time resolution (wide release date +90 days)",
fill = "count of tasks"
)
ggplot(main_df, aes(y = resolution_outcome, x = week_index, fill = isAuthorWMF)) +
facet_grid(~source) +
stat_dots(position = "dodgejust", quantiles = 100, color = NA) +
theme_minimal() +
labs(
title = "centile (1/100) distribution dot plot of tasks (faceted by source)",
x = "week_index of task filed",
y = "on-time resolution (wide release date +90 days) ",
fill = "isTaskAuthorWMF?"
)
ggplot(main_df, aes(y = priority_score, x = week_index, color = resolution_outcome)) +
facet_grid(~source) +
geom_point() +
geom_smooth() +
theme_minimal() +
labs(
title = "week_index x priority_score(faceted by source)",
x = "week_index of task filed",
y = "triaged priority score",
color = "on-time resolution (wide release date +90 days)"
)
ggplot(main_df, aes(y = priority_score, x = median_gerrit_reviewers, color = isAuthorWMF)) +
facet_grid(~source) +
geom_point() +
theme_minimal() +
labs(
title = "gerrit reviewers x priority_score(faceted by source)",
x = "median # of gerrit reviewers for linked PRs",
y = "triaged priority score",
color = "isAuthorWMF?"
)