library(tidyverse) #library(dsl) library(dplyr) dsl_csv <-"~/dsl/111725_DSL_frame.csv" dsl_df <- read.csv(dsl_csv, header = TRUE) ggplot( dsl_df, aes( x=as.factor(week_index), y= (TTR/168), fill=isAuthorWMF ) ) + facet_grid(source ~ ., scales = "free_y") + geom_boxplot() + theme_minimal() + scale_fill_viridis_d() + labs(x = "Weeks from Release", y = "Time to Resolution (weeks)", title = "TTR by Task Creation Date") weekly_summary <- dsl_df |> group_by(week_index, source, isAuthorWMF)|> summarise( tasks_made = sum(!is.na(resolution_outcome)), count_resolution_outcome = sum(dsl_score), author_closer_sum = sum(author_closer == TRUE), median_olmo_EP_prop_adac = median(olmo_EP_prop_adac), median_olmo_TSOL_prop_adac = median(olmo_TSOL_prop_adac), median_comments_before_resolution = median(n_comments_before) ) ggplot( weekly_summary, aes( x=week_index, y=tasks_made, fill=isAuthorWMF ) ) + facet_grid(source ~ ., scales = "free_y") + geom_col(position = position_dodge(width = 0.9), width = 0.8) + geom_vline(data = weekly_summary |> filter(source == "c1"), aes(xintercept = -29), linetype = "dotted", color = "black", linewidth = 0.5) + geom_vline(data = weekly_summary |> filter(source == "c1"), aes(xintercept = -9), linetype = "dotted", color = "black", linewidth = 0.5) + geom_vline(data = weekly_summary |> filter(source == "c1"), aes(xintercept = -4), linetype = "3313", color = "black", linewidth = 0.5) + geom_vline(data = weekly_summary |> filter(source == "c2"), aes(xintercept = -99), linetype = "dotted", color = "black", linewidth = 0.5) + geom_vline(data = weekly_summary |> filter(source == "c2"), aes(xintercept = -4), linetype = "3313", color = "black", linewidth = 0.5) + geom_vline(data = weekly_summary |> filter(source == "c3"), aes(xintercept = -97), linetype = "dotted", color = "black", linewidth = 0.5) + geom_vline(data = weekly_summary |> filter(source == "c3"), aes(xintercept = -3), linetype = "3313", color = "black", linewidth = 0.5) + geom_vline(xintercept = 0, linetype = "dashed", color = "black", linewidth = 0.5) + theme_minimal() + scale_fill_viridis_d() outcome_summary <- dsl_df |> group_by(source, isAuthorWMF)|> summarise( total_sum = sum(!is.na(resolution_outcome)), count_resolution_outcome = sum(resolution_outcome), success_prop = count_resolution_outcome / total_sum, median_ttr_days = median(TTR, na.rm = TRUE) / 24, median_comments_before_resolution = median(n_comments_before) ) library(ggplot2) library(ggdist) ggplot( dsl_df, aes( x=week_index, y=olmo_EP_prop_adac, color=isAuthorWMF ) ) + facet_grid(source ~ .) + geom_point() + geom_smooth() + scale_color_viridis_d() + theme_minimal() + labs(x = "Weeks from Release", y = "% of sentences machine-tagged as'Existent Problems'", title = "Proportion of 'Existent Problems' tags over time") dsl_df <- dsl_df |> mutate(priority = factor(priority, levels = c("Unbreak Now!", "High", "Medium", "Low", "Lowest", "Needs Triage"))) ggplot(dsl_df, aes( fill=resolution_outcome, x=priority )) + facet_grid(~source) + geom_bar() + theme_minimal() signed_power <- function(x, p) { sign(x) * abs(x) ^ p } signed_log <- function(x) sign(x) * log1p(abs(x)) dsl_df <- dsl_df |> mutate( sp_med_pc3_adac = signed_power(median_PC3_adac, 0.2), sp_med_pc4_adac = signed_power(median_PC4_adac, 0.2), sl_med_pc4_adac = signed_log(median_PC4_adac), sl_med_pc3_adac = signed_log(median_PC3_adac) ) ggplot(dsl_df, aes( y= log1p(TTR/24), x=sl_med_pc4_adac, shape=isAuthorWMF, color=isAuthorWMF )) + facet_grid(~source) + theme_minimal() + geom_smooth(method="loess", span=0.5) + geom_point() + scale_color_viridis_d()