library(tidyverse) #library(dsl) library(dplyr) dsl_csv <-"~/dsl/120725_DSL_frame.csv" dsl_df <- read.csv(dsl_csv, header = TRUE) dsl_df <- dsl_df |> mutate(priority = factor(priority, levels = rev(c("Unbreak Now!", "High", "Medium", "Low", "Lowest", "Needs Triage")))) # n authors who also close their own ac_summary <- dsl_df %>% group_by(week_index, author_closer, source) %>% summarise(count = n(), .groups = 'drop') |> group_by(week_index, source) %>% mutate(proportion = count / sum(count)) %>% ungroup() ac_summary|> ggplot(aes(x = week_index, y = proportion, color = author_closer, fill=author_closer, group = author_closer)) + geom_smooth()+ scale_color_viridis_d(option='turbo') + scale_fill_viridis_d(option='turbo') + facet_grid(source ~ ., scales = "free_y") + geom_point() + labs(title = "Did the Task Author also close the task? by week created", x = "Weeks from feature deployment", y = "% of work-items", color = "Did the Task Author also Close the Task?") + theme_minimal() # % of EP ggplot( dsl_df, aes( x=week_index, y=olmo_EP_prop_adac, color=isAuthorWMF ) ) + facet_grid(source ~ .) + geom_point() + geom_smooth() + scale_color_viridis_d() + theme_minimal() + labs(x = "Weeks from Release", y = "% of sentences machine-tagged as'Existent Problems'", title = "Proportion of 'Existent Problems' tags over time") # TTR by affiliation and priority dsl_df |> filter(priority == "Needs Triage" | priority == "Unbreak Now!" | priority == "High") |> filter(week_index >= -26) |> ggplot( aes( x=as.factor(week_index), y= TTR/168, color=priority, fill=priority, linetype=isAuthorWMF ) ) + facet_grid(source ~ .) + geom_boxplot(outlier.shape = NA) + theme_minimal() + coord_cartesian(ylim = c(0, 156)) + geom_vline(xintercept =27, linetype = "dashed", color = "black", linewidth = 0.5) + scale_color_viridis_d() + labs(x = "Weeks from Release", y = "Time to Resolution (weeks)", title = "TTR by Task Creation Date and Triage Priority") # % of tasks declined per week declined_summary <- dsl_df %>% group_by(week_index, task_status, source) %>% summarise(count = n(), .groups = 'drop') |> group_by(week_index, source) %>% mutate(proportion = count / sum(count)) %>% ungroup() task_status_plot <- declined_summary|> ggplot(aes(x = week_index, y = proportion, fill=task_status,)) + facet_grid(source ~ ., scales = "free_y", labeller = labeller(source = c("c1" = "VisualEditor", "c2" = "HTTPS-login", "c3" = "HTTP-deprecation"))) + geom_col(position = position_dodge(width = 0.9), width = 0.8) + scale_fill_viridis_d(option='magma') + labs(title = "Task Status (as of February 28, 2025) by Week", x = "Weeks from feature deployment", y = "% of items in status", fill = "Task Status") + geom_vline(xintercept = 0, linetype = "dashed", color = "black", linewidth = 0.5) + geom_text( data = subset(declined_summary, source == "c1" & week_index == 6), aes(x=week_index, y=0.9, label='Opt-out deployment'), size = 2.5) + theme_minimal() + theme(legend.position = "top") ggsave( filename = "120825_tasks_status.png", plot = task_status_plot, width = 12, # inches height = 4, # inches dpi = 600 # high resolution ) # count of gerrit links to operations, extensions, or core gerrit_summary <- dsl_df |> filter(!is.na(gerrit_repo)) |> mutate( focal_repo = case_when( str_detect(gerrit_repo, "extensions") ~ "extensions", str_detect(gerrit_repo, "operations") ~ "operations", str_detect(gerrit_repo, "integration") ~ "operations", TRUE ~ "core" ) ) |> group_by(week_index, focal_repo, source) %>% summarise(count = n(), .groups = 'drop') gerrit_summary|> ggplot(aes(x = week_index, y = count, color = focal_repo, fill=focal_repo, group = focal_repo)) + geom_smooth()+ scale_color_viridis_d(option='turbo') + scale_fill_viridis_d(option='turbo') + facet_grid(source ~ ., scales = "free_y") + geom_point() + labs(title = "Task status for new tasks by week created", x = "Weeks from feature deployment", y = "% of items in status", color = "Task Status") + theme_minimal()