1
0
mw-lifecycle-analysis/dsl/scratch.R
2026-01-06 21:37:24 -08:00

62 lines
1.8 KiB
R

library(tidyverse)
dsl_csv <-"~/dsl/121625_DSL_frame.csv"
dsl_df <- read.csv(dsl_csv, header = TRUE)
ttr_trajectory <- dsl_df |>
mutate(ttr_weeks = TTR_hours / 168) |>
mutate(isTriaged = if_else(priority == 'Needs Triage',
"Not Triaged",
"Triaged")) |>
group_by(week_index, isTriaged, source) |>
summarise(
count = n(),
mean_ttr = mean(ttr_weeks, na.rm = TRUE),
sd_ttr = sd(ttr_weeks, na.rm = TRUE)
)
ggplot(ttr_trajectory, aes(x = week_index)) +
# Line for mean TTR
geom_line(aes(y = mean_ttr, color = "Mean TTR"), size = 1) +
# Ribbon for standard deviation
geom_ribbon(aes(ymin = mean_ttr - sd_ttr, ymax = mean_ttr + sd_ttr),
fill = "lightblue", alpha = 0.4) +
# Line for count of tasks
geom_line(aes(y = count,
color = "Count of New Tasks"), size = 1, linetype = "dashed") +
# Facet the plot by source and triaged status
facet_wrap(source ~ isTriaged, scales = "free_y") +
labs(
title = "TTR by Source and Triage Status (TODO)",
x = "Week Index",
y = "Mean TTR (in weeks)",
color = "Metrics"
) +
scale_color_manual(values = c("Mean TTR" = "blue", "Count of New Tasks" = "red")) +
theme_minimal() +
theme(
strip.text = element_text(face = "bold", size = 12),
axis.text = element_text(size = 10),
axis.title = element_text(size = 12)
)
dsl_df |>
filter(week_index >= 4)|>
filter(source == "c1") |>
filter(priority == "Needs Triage")|>
mutate(ttr_weeks = TTR_hours / 168) |>
group_by(isAuthorWMF) |>
summarise(
mean_ttr = mean(ttr_weeks, na.rm = TRUE),
sd_ttr = sd(ttr_weeks, na.rm = TRUE)
)
triage <- dsl_df |>
group_by(source, isAuthorWMF, priority)|>
summarise(
count = n()
)