287 lines
9.0 KiB
R
287 lines
9.0 KiB
R
library(tidyverse)
|
|
#library(dsl)
|
|
library(dplyr)
|
|
dsl_csv <-"~/dsl/120725_DSL_frame.csv"
|
|
dsl_df <- read.csv(dsl_csv, header = TRUE)
|
|
|
|
dsl_df <- dsl_df |>
|
|
mutate(priority = factor(priority,
|
|
levels = rev(c("Unbreak Now!", "High", "Medium", "Low", "Lowest", "Needs Triage"))))
|
|
|
|
data_summary <- dsl_df %>%
|
|
group_by(week_index, priority, source) %>%
|
|
summarise(count = n(), .groups = 'drop') |>
|
|
group_by(week_index, source) %>%
|
|
mutate(proportion = count / sum(count)) %>%
|
|
ungroup()
|
|
|
|
library(ggdist)
|
|
priority_plot_summary <- data_summary |>
|
|
filter(priority == "Needs Triage" |
|
|
priority == "Unbreak Now!" |
|
|
priority == "High")
|
|
priority_plot <- priority_plot_summary |>
|
|
ggplot(aes(x = week_index, y = proportion, fill = priority)) +
|
|
facet_grid(source ~ .,
|
|
scales = "free_y",
|
|
labeller = labeller(source = c("c1" = "VisualEditor",
|
|
"c2" = "HTTPS-login",
|
|
"c3" = "HTTP-deprecation"))) +
|
|
geom_col(position = position_dodge(width = 0.9), width = 0.8) +
|
|
scale_color_viridis_d(option='turbo') +
|
|
scale_fill_viridis_d(option='turbo') +
|
|
geom_vline(xintercept = 0, linetype = "dashed", color = "black", linewidth = 0.5) +
|
|
geom_text(
|
|
data = subset(priority_plot_summary, source == "c1" & week_index == 6),
|
|
aes(x=week_index, y=0.6, label='Opt-out deployment'),
|
|
size = 2.5) +
|
|
labs(title = "Triage priority proportions for new tasks by week created",
|
|
x = "Weeks from feature deployment",
|
|
y = "% of items tagged",
|
|
fill = "Priority Tag") +
|
|
theme_minimal() + theme(legend.position = "top")
|
|
priority_plot
|
|
ggsave(
|
|
filename = "120825_triage_priority.png",
|
|
plot = priority_plot,
|
|
width = 12, # inches
|
|
height = 4, # inches
|
|
dpi = 600 # high resolution
|
|
)
|
|
|
|
data_summary|>
|
|
filter(priority == "Needs Triage" |
|
|
priority == "Unbreak Now!" |
|
|
priority == "High") |>
|
|
ggplot(aes(x = week_index, y = proportion, color = priority, fill=priority, group = priority)) +
|
|
geom_smooth()+
|
|
scale_color_viridis_d(option='turbo') +
|
|
scale_fill_viridis_d(option='turbo') +
|
|
facet_grid(source ~ ., scales = "free_y") +
|
|
geom_point() +
|
|
labs(title = "Proportions of Triage Priority by Week",
|
|
x = "Weeks from feature deployment",
|
|
y = "% of items tagged",
|
|
color = "Priority Tag") +
|
|
theme_minimal()
|
|
|
|
c1_ttr_plot <- dsl_df |>
|
|
filter(priority == "Needs Triage" |
|
|
priority == "Unbreak Now!" |
|
|
priority == "High") |>
|
|
filter(week_index >= -26) |>
|
|
filter(source == 'c1') |>
|
|
ggplot(
|
|
aes(
|
|
x=as.factor(week_index),
|
|
y= TTR/168,
|
|
fill=priority
|
|
)
|
|
) +
|
|
facet_grid(source ~ .,
|
|
scales = "free_y",
|
|
labeller = labeller(source = c("c1" = "VisualEditor",
|
|
"c2" = "HTTPS-login",
|
|
"c3" = "HTTP-deprecation"))) +
|
|
geom_boxplot(outlier.shape = NA) +
|
|
theme_minimal() +
|
|
coord_cartesian(ylim = c(0, 52)) +
|
|
geom_vline(xintercept =27, linetype = "dashed", color = "black", linewidth = 0.5) +
|
|
scale_fill_viridis_d(option='turbo') +
|
|
geom_text(
|
|
aes(x= 25, y=45, label='Opt-out deployment'),
|
|
size = 4) +
|
|
labs(x = "Weeks from Release",
|
|
y = "Time to Resolution (weeks)",
|
|
fill = "Priority Tag") +
|
|
theme(legend.position = "top")
|
|
c1_ttr_plot
|
|
ggsave(
|
|
filename = "120825_c1_ttr.png",
|
|
plot = c1_ttr_plot,
|
|
width = 12, # inches
|
|
height = 6, # inches
|
|
dpi = 600 # high resolution
|
|
)
|
|
|
|
dsl_df_long <- dsl_df %>%
|
|
pivot_longer(
|
|
cols = c(olmo_EP_prop, olmo_RK_prop, olmo_TSOL_prop),
|
|
names_to = "tag",
|
|
values_to = "proportion"
|
|
) %>%
|
|
mutate(tag = gsub("olmo_|_prop", "", tag),
|
|
tag = case_when(
|
|
tag == "EP" ~ "Existent Problem",
|
|
tag == "RK" ~ "Record Keeping",
|
|
tag =="TSOL" ~ "Solutions"
|
|
))
|
|
|
|
ggplot(
|
|
dsl_df_long,
|
|
aes(
|
|
x = tag,
|
|
y = proportion,
|
|
fill = isAuthorWMF,
|
|
)
|
|
) +
|
|
facet_grid(source ~ .) +
|
|
geom_boxplot() +
|
|
theme_minimal() +
|
|
scale_fill_viridis_d() +
|
|
labs(
|
|
x = "Tag",
|
|
y = "% of sentences tagged",
|
|
title = "Proportion of machine tags of sentence focus, by comment author affiliation",
|
|
color = "Is Author WMF",
|
|
fill = "Is Author WMF"
|
|
)
|
|
|
|
|
|
weekly_summary <- dsl_df |>
|
|
group_by(week_index, source, isAuthorWMF)|>
|
|
summarise(
|
|
tasks_made = sum(!is.na(resolution_outcome)),
|
|
count_resolution_outcome = sum(dsl_score),
|
|
author_closer_sum = sum(author_closer == TRUE),
|
|
median_olmo_EP_prop_adac = median(olmo_EP_prop_adac),
|
|
median_olmo_TSOL_prop_adac = median(olmo_TSOL_prop_adac),
|
|
median_olmo_RK_prop_adac = median(olmo_RK_prop_adac),
|
|
median_comments_before_resolution = median(n_comments_before)
|
|
)
|
|
|
|
tasks_created <- ggplot(
|
|
weekly_summary,
|
|
aes(
|
|
x=week_index,
|
|
y=tasks_made,
|
|
fill=isAuthorWMF
|
|
)
|
|
) +
|
|
facet_grid(source ~ .,
|
|
scales = "free_y",
|
|
labeller = labeller(source = c("c1" = "VisualEditor",
|
|
"c2" = "HTTPS-login",
|
|
"c3" = "HTTP-deprecation"))) +
|
|
geom_col(position = position_dodge(width = 0.9), width = 0.8) +
|
|
geom_vline(data = weekly_summary |> filter(source == "c1"),
|
|
aes(xintercept = -29),
|
|
linetype = "dotted", color = "black", linewidth = 0.5) +
|
|
geom_vline(data = weekly_summary |> filter(source == "c1"),
|
|
aes(xintercept = -9),
|
|
linetype = "dotted", color = "black", linewidth = 0.5) +
|
|
geom_vline(data = weekly_summary |> filter(source == "c1"),
|
|
aes(xintercept = -4),
|
|
linetype = "3313", color = "black", linewidth = 0.5) +
|
|
geom_vline(data = weekly_summary |> filter(source == "c2"),
|
|
aes(xintercept = -99),
|
|
linetype = "dotted", color = "black", linewidth = 0.5) +
|
|
geom_vline(data = weekly_summary |> filter(source == "c2"),
|
|
aes(xintercept = -4),
|
|
linetype = "3313", color = "black", linewidth = 0.5) +
|
|
geom_vline(data = weekly_summary |> filter(source == "c3"),
|
|
aes(xintercept = -97),
|
|
linetype = "dotted", color = "black", linewidth = 0.5) +
|
|
geom_vline(data = weekly_summary |> filter(source == "c3"),
|
|
aes(xintercept = -3),
|
|
linetype = "3313", color = "black", linewidth = 0.5) +
|
|
geom_vline(xintercept = 0, linetype = "dashed", color = "black", linewidth = 0.5) +
|
|
geom_text(
|
|
data = subset(weekly_summary, source == "c1" & week_index == 6),
|
|
aes(x=week_index, y=120, label='Opt-out deployment'),
|
|
size = 2.5) +
|
|
geom_text(
|
|
data = subset(weekly_summary, source == "c1" & week_index == -33),
|
|
aes(x=week_index, y=120, label='Opt-in Testing'),
|
|
size = 2.5) +
|
|
geom_text(
|
|
data = subset(weekly_summary, source == "c2" & week_index == -12),
|
|
aes(x=week_index, y=20, label='Deployment Announcement'),
|
|
size = 2.5) +
|
|
theme_minimal() +
|
|
scale_fill_viridis_d() +
|
|
labs(
|
|
x = "Weeks from Feature Deployment",
|
|
y = "Count of Tasks Created",
|
|
fill = "Task Author Affiliated with WMF?"
|
|
) +
|
|
theme(legend.position = "top")
|
|
tasks_created
|
|
ggsave(
|
|
filename = "120825_tasks_created.png",
|
|
plot = tasks_created,
|
|
width = 12, # inches
|
|
height = 6, # inches
|
|
dpi = 600 # high resolution
|
|
)
|
|
|
|
outcome_summary <- dsl_df |>
|
|
group_by(source, isAuthorWMF)|>
|
|
summarise(
|
|
total_sum = sum(!is.na(resolution_outcome)),
|
|
count_resolution_outcome = sum(resolution_outcome),
|
|
success_prop = count_resolution_outcome / total_sum,
|
|
median_ttr_days = median(TTR, na.rm = TRUE) / 24,
|
|
median_comments_before_resolution = median(n_comments_before)
|
|
)
|
|
|
|
|
|
library(ggplot2)
|
|
library(ggdist)
|
|
|
|
|
|
ggplot(
|
|
dsl_df,
|
|
aes(
|
|
x=week_index,
|
|
y=olmo_EP_prop_adac,
|
|
color=isAuthorWMF
|
|
)
|
|
) +
|
|
facet_grid(source ~ .) +
|
|
geom_point() +
|
|
geom_smooth() +
|
|
scale_color_viridis_d() +
|
|
theme_minimal() +
|
|
labs(x = "Weeks from Release", y = "% of sentences machine-tagged as'Existent Problems'", title = "Proportion of 'Existent Problems' tags over time")
|
|
|
|
dsl_df <- dsl_df |>
|
|
mutate(priority = factor(priority,
|
|
levels = c("Unbreak Now!", "High", "Medium", "Low", "Lowest", "Needs Triage")))
|
|
|
|
ggplot(dsl_df,
|
|
aes(
|
|
fill=resolution_outcome,
|
|
x=priority
|
|
)) +
|
|
facet_grid(~source) +
|
|
geom_bar() +
|
|
theme_minimal()
|
|
|
|
|
|
signed_power <- function(x, p) {
|
|
sign(x) * abs(x) ^ p
|
|
}
|
|
|
|
signed_log <- function(x) sign(x) * log1p(abs(x))
|
|
dsl_df <- dsl_df |>
|
|
mutate(
|
|
sp_med_pc3_adac = signed_power(median_PC3_adac, 0.2),
|
|
sp_med_pc4_adac = signed_power(median_PC4_adac, 0.2),
|
|
sl_med_pc4_adac = signed_log(median_PC4_adac),
|
|
sl_med_pc3_adac = signed_log(median_PC3_adac)
|
|
)
|
|
|
|
|
|
ggplot(dsl_df, aes(
|
|
y= log1p(TTR/24),
|
|
x=sl_med_pc4_adac,
|
|
shape=isAuthorWMF,
|
|
color=isAuthorWMF
|
|
)) +
|
|
facet_grid(~source) +
|
|
theme_minimal() +
|
|
geom_smooth(method="loess", span=0.5) +
|
|
geom_point() +
|
|
scale_color_viridis_d()
|