library(tidyverse) #library(dsl) library(dplyr) dsl_csv <-"~/dsl/120725_DSL_frame.csv" dsl_df <- read.csv(dsl_csv, header = TRUE) dsl_df <- dsl_df |> mutate(priority = factor(priority, levels = rev(c("Unbreak Now!", "High", "Medium", "Low", "Lowest", "Needs Triage")))) data_summary <- dsl_df %>% group_by(week_index, priority, source) %>% summarise(count = n(), .groups = 'drop') |> group_by(week_index, source) %>% mutate(proportion = count / sum(count)) %>% ungroup() library(ggdist) priority_plot_summary <- data_summary |> filter(priority == "Needs Triage" | priority == "Unbreak Now!" | priority == "High") priority_plot <- priority_plot_summary |> ggplot(aes(x = week_index, y = proportion, fill = priority)) + facet_grid(source ~ ., scales = "free_y", labeller = labeller(source = c("c1" = "VisualEditor", "c2" = "HTTPS-login", "c3" = "HTTP-deprecation"))) + geom_col(position = position_dodge(width = 0.9), width = 0.8) + scale_color_viridis_d(option='turbo') + scale_fill_viridis_d(option='turbo') + geom_vline(xintercept = 0, linetype = "dashed", color = "black", linewidth = 0.5) + geom_text( data = subset(priority_plot_summary, source == "c1" & week_index == 6), aes(x=week_index, y=0.6, label='Opt-out deployment'), size = 2.5) + labs(title = "Triage priority proportions for new tasks by week created", x = "Weeks from feature deployment", y = "% of items tagged", fill = "Priority Tag") + theme_minimal() + theme(legend.position = "top") priority_plot ggsave( filename = "120825_triage_priority.png", plot = priority_plot, width = 12, # inches height = 4, # inches dpi = 600 # high resolution ) data_summary|> filter(priority == "Needs Triage" | priority == "Unbreak Now!" | priority == "High") |> ggplot(aes(x = week_index, y = proportion, color = priority, fill=priority, group = priority)) + geom_smooth()+ scale_color_viridis_d(option='turbo') + scale_fill_viridis_d(option='turbo') + facet_grid(source ~ ., scales = "free_y") + geom_point() + labs(title = "Proportions of Triage Priority by Week", x = "Weeks from feature deployment", y = "% of items tagged", color = "Priority Tag") + theme_minimal() c1_ttr_plot <- dsl_df |> filter(priority == "Needs Triage" | priority == "Unbreak Now!" | priority == "High") |> filter(week_index >= -26) |> filter(source == 'c1') |> ggplot( aes( x=as.factor(week_index), y= TTR/168, fill=priority ) ) + facet_grid(source ~ ., scales = "free_y", labeller = labeller(source = c("c1" = "VisualEditor", "c2" = "HTTPS-login", "c3" = "HTTP-deprecation"))) + geom_boxplot(outlier.shape = NA) + theme_minimal() + coord_cartesian(ylim = c(0, 52)) + geom_vline(xintercept =27, linetype = "dashed", color = "black", linewidth = 0.5) + scale_fill_viridis_d(option='turbo') + geom_text( aes(x= 25, y=45, label='Opt-out deployment'), size = 4) + labs(x = "Weeks from Release", y = "Time to Resolution (weeks)", fill = "Priority Tag", title = "VisualEditor Time to Resolution by Triage Priority") + theme(legend.position = "top") c1_ttr_plot ggsave( filename = "120825_c1_ttr.png", plot = c1_ttr_plot, width = 12, # inches height = 4, # inches dpi = 600 # high resolution ) dsl_df_long <- dsl_df %>% pivot_longer( cols = c(olmo_EP_prop, olmo_RK_prop, olmo_TSOL_prop), names_to = "tag", values_to = "proportion" ) %>% mutate(tag = gsub("olmo_|_prop", "", tag), tag = case_when( tag == "EP" ~ "Existent Problem", tag == "RK" ~ "Record Keeping", tag =="TSOL" ~ "Solutions" )) ggplot( dsl_df_long, aes( x = tag, y = proportion, fill = isAuthorWMF, ) ) + facet_grid(source ~ .) + geom_boxplot() + theme_minimal() + scale_fill_viridis_d() + labs( x = "Tag", y = "% of sentences tagged", title = "Proportion of machine tags of sentence focus, by comment author affiliation", color = "Is Author WMF", fill = "Is Author WMF" ) weekly_summary <- dsl_df |> group_by(week_index, source, isAuthorWMF)|> summarise( tasks_made = sum(!is.na(resolution_outcome)), count_resolution_outcome = sum(dsl_score), author_closer_sum = sum(author_closer == TRUE), median_olmo_EP_prop_adac = median(olmo_EP_prop_adac), median_olmo_TSOL_prop_adac = median(olmo_TSOL_prop_adac), median_olmo_RK_prop_adac = median(olmo_RK_prop_adac), median_comments_before_resolution = median(n_comments_before) ) tasks_created <- ggplot( weekly_summary, aes( x=week_index, y=tasks_made, fill=isAuthorWMF ) ) + facet_grid(source ~ ., scales = "free_y", labeller = labeller(source = c("c1" = "VisualEditor", "c2" = "HTTPS-login", "c3" = "HTTP-deprecation"))) + geom_col(position = position_dodge(width = 0.9), width = 0.8) + geom_vline(data = weekly_summary |> filter(source == "c1"), aes(xintercept = -29), linetype = "dotted", color = "black", linewidth = 0.5) + geom_vline(data = weekly_summary |> filter(source == "c1"), aes(xintercept = -9), linetype = "dotted", color = "black", linewidth = 0.5) + geom_vline(data = weekly_summary |> filter(source == "c1"), aes(xintercept = -4), linetype = "3313", color = "black", linewidth = 0.5) + geom_vline(data = weekly_summary |> filter(source == "c2"), aes(xintercept = -99), linetype = "dotted", color = "black", linewidth = 0.5) + geom_vline(data = weekly_summary |> filter(source == "c2"), aes(xintercept = -4), linetype = "3313", color = "black", linewidth = 0.5) + geom_vline(data = weekly_summary |> filter(source == "c3"), aes(xintercept = -97), linetype = "dotted", color = "black", linewidth = 0.5) + geom_vline(data = weekly_summary |> filter(source == "c3"), aes(xintercept = -3), linetype = "3313", color = "black", linewidth = 0.5) + geom_vline(xintercept = 0, linetype = "dashed", color = "black", linewidth = 0.5) + geom_text( data = subset(weekly_summary, source == "c1" & week_index == 6), aes(x=week_index, y=120, label='Opt-out deployment'), size = 2.5) + geom_text( data = subset(weekly_summary, source == "c1" & week_index == -33), aes(x=week_index, y=120, label='Opt-in Testing'), size = 2.5) + geom_text( data = subset(weekly_summary, source == "c2" & week_index == -12), aes(x=week_index, y=20, label='Deployment Announcement'), size = 2.5) + theme_minimal() + scale_fill_viridis_d() + labs( x = "Weeks from Feature Deployment", y = "Count of Tasks Created", title = "Phabricator Tasks Created by Week and Author Affiliation", fill = "Task Author Affiliated with WMF?" ) + theme(legend.position = "top") tasks_created ggsave( filename = "120825_tasks_created.png", plot = tasks_created, width = 12, # inches height = 4, # inches dpi = 600 # high resolution ) outcome_summary <- dsl_df |> group_by(source, isAuthorWMF)|> summarise( total_sum = sum(!is.na(resolution_outcome)), count_resolution_outcome = sum(resolution_outcome), success_prop = count_resolution_outcome / total_sum, median_ttr_days = median(TTR, na.rm = TRUE) / 24, median_comments_before_resolution = median(n_comments_before) ) library(ggplot2) library(ggdist) ggplot( dsl_df, aes( x=week_index, y=olmo_EP_prop_adac, color=isAuthorWMF ) ) + facet_grid(source ~ .) + geom_point() + geom_smooth() + scale_color_viridis_d() + theme_minimal() + labs(x = "Weeks from Release", y = "% of sentences machine-tagged as'Existent Problems'", title = "Proportion of 'Existent Problems' tags over time") dsl_df <- dsl_df |> mutate(priority = factor(priority, levels = c("Unbreak Now!", "High", "Medium", "Low", "Lowest", "Needs Triage"))) ggplot(dsl_df, aes( fill=resolution_outcome, x=priority )) + facet_grid(~source) + geom_bar() + theme_minimal() signed_power <- function(x, p) { sign(x) * abs(x) ^ p } signed_log <- function(x) sign(x) * log1p(abs(x)) dsl_df <- dsl_df |> mutate( sp_med_pc3_adac = signed_power(median_PC3_adac, 0.2), sp_med_pc4_adac = signed_power(median_PC4_adac, 0.2), sl_med_pc4_adac = signed_log(median_PC4_adac), sl_med_pc3_adac = signed_log(median_PC3_adac) ) ggplot(dsl_df, aes( y= log1p(TTR/24), x=sl_med_pc4_adac, shape=isAuthorWMF, color=isAuthorWMF )) + facet_grid(~source) + theme_minimal() + geom_smooth(method="loess", span=0.5) + geom_point() + scale_color_viridis_d()