1
0
mw-lifecycle-analysis/dsl/final_bivariate.R
2025-12-02 09:18:50 -08:00

137 lines
3.9 KiB
R

library(tidyverse)
#library(dsl)
library(dplyr)
dsl_csv <-"~/dsl/111725_DSL_frame.csv"
dsl_df <- read.csv(dsl_csv, header = TRUE)
ggplot(
dsl_df,
aes(
x=as.factor(week_index),
y= (TTR/168),
fill=isAuthorWMF
)
) +
facet_grid(source ~ ., scales = "free_y") +
geom_boxplot() +
theme_minimal() +
scale_fill_viridis_d() +
labs(x = "Weeks from Release", y = "Time to Resolution (weeks)", title = "TTR by Task Creation Date")
weekly_summary <- dsl_df |>
group_by(week_index, source, isAuthorWMF)|>
summarise(
tasks_made = sum(!is.na(resolution_outcome)),
count_resolution_outcome = sum(dsl_score),
author_closer_sum = sum(author_closer == TRUE),
median_olmo_EP_prop_adac = median(olmo_EP_prop_adac),
median_olmo_TSOL_prop_adac = median(olmo_TSOL_prop_adac),
median_comments_before_resolution = median(n_comments_before)
)
ggplot(
weekly_summary,
aes(
x=week_index,
y=tasks_made,
fill=isAuthorWMF
)
) +
facet_grid(source ~ ., scales = "free_y") +
geom_col(position = position_dodge(width = 0.9), width = 0.8) +
geom_vline(data = weekly_summary |> filter(source == "c1"),
aes(xintercept = -29),
linetype = "dotted", color = "black", linewidth = 0.5) +
geom_vline(data = weekly_summary |> filter(source == "c1"),
aes(xintercept = -9),
linetype = "dotted", color = "black", linewidth = 0.5) +
geom_vline(data = weekly_summary |> filter(source == "c1"),
aes(xintercept = -4),
linetype = "3313", color = "black", linewidth = 0.5) +
geom_vline(data = weekly_summary |> filter(source == "c2"),
aes(xintercept = -99),
linetype = "dotted", color = "black", linewidth = 0.5) +
geom_vline(data = weekly_summary |> filter(source == "c2"),
aes(xintercept = -4),
linetype = "3313", color = "black", linewidth = 0.5) +
geom_vline(data = weekly_summary |> filter(source == "c3"),
aes(xintercept = -97),
linetype = "dotted", color = "black", linewidth = 0.5) +
geom_vline(data = weekly_summary |> filter(source == "c3"),
aes(xintercept = -3),
linetype = "3313", color = "black", linewidth = 0.5) +
geom_vline(xintercept = 0, linetype = "dashed", color = "black", linewidth = 0.5) +
theme_minimal() +
scale_fill_viridis_d()
outcome_summary <- dsl_df |>
group_by(source, isAuthorWMF)|>
summarise(
total_sum = sum(!is.na(resolution_outcome)),
count_resolution_outcome = sum(resolution_outcome),
success_prop = count_resolution_outcome / total_sum,
median_ttr_days = median(TTR, na.rm = TRUE) / 24,
median_comments_before_resolution = median(n_comments_before)
)
library(ggplot2)
library(ggdist)
ggplot(
dsl_df,
aes(
x=week_index,
y=olmo_EP_prop_adac,
color=isAuthorWMF
)
) +
facet_grid(source ~ .) +
geom_point() +
geom_smooth() +
scale_color_viridis_d() +
theme_minimal()
dsl_df <- dsl_df |>
mutate(priority = factor(priority,
levels = c("Unbreak Now!", "High", "Medium", "Low", "Lowest", "Needs Triage")))
ggplot(dsl_df,
aes(
fill=resolution_outcome,
x=priority
)) +
facet_grid(~source) +
geom_bar() +
theme_minimal()
signed_power <- function(x, p) {
sign(x) * abs(x) ^ p
}
signed_log <- function(x) sign(x) * log1p(abs(x))
dsl_df <- dsl_df |>
mutate(
sp_med_pc3_adac = signed_power(median_PC3_adac, 0.2),
sp_med_pc4_adac = signed_power(median_PC4_adac, 0.2),
sl_med_pc4_adac = signed_log(median_PC4_adac),
sl_med_pc3_adac = signed_log(median_PC3_adac)
)
ggplot(dsl_df, aes(
y= log1p(TTR/24),
x=sl_med_pc4_adac,
shape=isAuthorWMF,
color=isAuthorWMF
)) +
facet_grid(~source) +
theme_minimal() +
geom_smooth(method="loess", span=0.5) +
geom_point() +
scale_color_viridis_d()