1
0
mw-lifecycle-analysis/dsl/rq1_plots.R
2025-12-08 20:21:58 -08:00

168 lines
4.3 KiB
R

library(tidyverse)
#library(dsl)
library(dplyr)
dsl_csv <-"~/dsl/120725_DSL_frame.csv"
dsl_df <- read.csv(dsl_csv, header = TRUE)
dsl_df <- dsl_df |>
mutate(priority = factor(priority,
levels = rev(c("Unbreak Now!", "High", "Medium", "Low", "Lowest", "Needs Triage"))))
# n authors who also close their own
ac_summary <- dsl_df %>%
group_by(week_index, author_closer, source) %>%
summarise(count = n(), .groups = 'drop') |>
group_by(week_index, source) %>%
mutate(proportion = count / sum(count)) %>%
ungroup()
ac_summary|>
ggplot(aes(x = week_index, y = proportion, color = author_closer, fill=author_closer, group = author_closer)) +
geom_smooth()+
scale_color_viridis_d(option='turbo') +
scale_fill_viridis_d(option='turbo') +
facet_grid(source ~ ., scales = "free_y") +
geom_point() +
labs(title = "Did the Task Author also close the task? by week created",
x = "Weeks from feature deployment",
y = "% of work-items",
color = "Did the Task Author also Close the Task?") +
theme_minimal()
# % of EP
ggplot(
dsl_df,
aes(
x=week_index,
y=olmo_EP_prop_adac,
color=isAuthorWMF
)
) +
facet_grid(source ~ .) +
geom_point() +
geom_smooth() +
scale_color_viridis_d() +
theme_minimal() +
labs(x = "Weeks from Release", y = "% of sentences machine-tagged as'Existent Problems'", title = "Proportion of 'Existent Problems' tags over time")
# TTR by affiliation and priority
dsl_df |>
filter(priority == "Needs Triage" |
priority == "Unbreak Now!" |
priority == "High") |>
filter(week_index >= -26) |>
ggplot(
aes(
x=as.factor(week_index),
y= TTR/168,
color=priority,
fill=priority,
linetype=isAuthorWMF
)
) +
facet_grid(source ~ .) +
geom_boxplot(outlier.shape = NA) +
theme_minimal() +
coord_cartesian(ylim = c(0, 156)) +
geom_vline(xintercept =27, linetype = "dashed", color = "black", linewidth = 0.5) +
scale_color_viridis_d() +
labs(x = "Weeks from Release", y = "Time to Resolution (weeks)", title = "TTR by Task Creation Date and Triage Priority")
# % of tasks declined per week
declined_summary <- dsl_df %>%
group_by(week_index, task_status, source) %>%
summarise(count = n(), .groups = 'drop') |>
group_by(week_index, source) %>%
mutate(proportion = count / sum(count)) %>%
ungroup()
task_status_plot <- declined_summary|>
ggplot(aes(x = week_index, y = proportion, fill=task_status,)) +
facet_grid(source ~ .,
scales = "free_y",
labeller = labeller(source = c("c1" = "VisualEditor",
"c2" = "HTTPS-login",
"c3" = "HTTP-deprecation"))) +
geom_col(position = position_dodge(width = 0.9), width = 0.8) +
scale_fill_viridis_d(option='magma') +
labs(x = "Weeks from feature deployment",
y = "% of items in status",
fill = "Task Status") +
geom_vline(xintercept = 0, linetype = "dashed", color = "black", linewidth = 0.5) +
geom_text(
data = subset(declined_summary, source == "c1" & week_index == 6),
aes(x=week_index, y=0.9, label='Opt-out deployment'),
size = 2.5) +
theme_minimal() +
theme(legend.position = "top")
ggsave(
filename = "120825_tasks_status.png",
plot = task_status_plot,
width = 12, # inches
height = 6, # inches
dpi = 600 # high resolution
)
# count of gerrit links to operations, extensions, or core
gerrit_summary <- dsl_df |>
filter(!is.na(gerrit_repo)) |>
mutate(
focal_repo = case_when(
str_detect(gerrit_repo, "extensions") ~ "extensions",
str_detect(gerrit_repo, "operations") ~ "operations",
str_detect(gerrit_repo, "integration") ~ "operations",
TRUE ~ "core"
)
) |>
group_by(week_index, focal_repo, source) %>%
summarise(count = n(), .groups = 'drop')
gerrit_summary|>
ggplot(aes(x = week_index, y = count, color = focal_repo, fill=focal_repo, group = focal_repo)) +
geom_smooth()+
scale_color_viridis_d(option='turbo') +
scale_fill_viridis_d(option='turbo') +
facet_grid(source ~ ., scales = "free_y") +
geom_point() +
labs(title = "Task status for new tasks by week created",
x = "Weeks from feature deployment",
y = "% of items in status",
color = "Task Status") +
theme_minimal()