From c010e9f9cf25b943e4f4e9e46ad3728126f35bbb Mon Sep 17 00:00:00 2001 From: Matthew Gaughan Date: Sun, 7 Dec 2025 13:32:35 -0800 Subject: [PATCH] updating rq1 plots --- dsl/rq1_plots.R | 154 +++++++++++++++++++++++++++ mgaughan-rstudio-server_31713063.out | 17 +++ 2 files changed, 171 insertions(+) create mode 100644 dsl/rq1_plots.R create mode 100644 mgaughan-rstudio-server_31713063.out diff --git a/dsl/rq1_plots.R b/dsl/rq1_plots.R new file mode 100644 index 0000000..ca80b31 --- /dev/null +++ b/dsl/rq1_plots.R @@ -0,0 +1,154 @@ +library(tidyverse) +#library(dsl) +library(dplyr) +dsl_csv <-"~/dsl/120725_DSL_frame.csv" +dsl_df <- read.csv(dsl_csv, header = TRUE) + +dsl_df <- dsl_df |> + mutate(priority = factor(priority, + levels = rev(c("Unbreak Now!", "High", "Medium", "Low", "Lowest", "Needs Triage")))) + +# n authors who also close their own +ac_summary <- dsl_df %>% + group_by(week_index, author_closer, source) %>% + summarise(count = n(), .groups = 'drop') |> + group_by(week_index, source) %>% + mutate(proportion = count / sum(count)) %>% + ungroup() + +ac_summary|> + ggplot(aes(x = week_index, y = proportion, color = author_closer, fill=author_closer, group = author_closer)) + + geom_smooth()+ + scale_color_viridis_d(option='turbo') + + scale_fill_viridis_d(option='turbo') + + facet_grid(source ~ ., scales = "free_y") + + geom_point() + + labs(title = "Did the Task Author also close the task? by week created", + x = "Weeks from feature deployment", + y = "% of work-items", + color = "Did the Task Author also Close the Task?") + + theme_minimal() + + +# % of EP +ggplot( + dsl_df, + aes( + x=week_index, + y=olmo_EP_prop_adac, + color=isAuthorWMF + ) +) + + facet_grid(source ~ .) + + geom_point() + + geom_smooth() + + scale_color_viridis_d() + + theme_minimal() + + labs(x = "Weeks from Release", y = "% of sentences machine-tagged as'Existent Problems'", title = "Proportion of 'Existent Problems' tags over time") + +# TTR by affiliation and priority +dsl_df |> + filter(priority == "Needs Triage" | + priority == "Unbreak Now!" | + priority == "High") |> + filter(week_index >= -26) |> + ggplot( + aes( + x=as.factor(week_index), + y= TTR/168, + color=priority, + fill=priority, + linetype=isAuthorWMF + ) + ) + + facet_grid(source ~ .) + + geom_boxplot(outlier.shape = NA) + + theme_minimal() + + coord_cartesian(ylim = c(0, 156)) + + geom_vline(xintercept =27, linetype = "dashed", color = "black", linewidth = 0.5) + + scale_color_viridis_d() + + labs(x = "Weeks from Release", y = "Time to Resolution (weeks)", title = "TTR by Task Creation Date and Triage Priority") + + + +# % of tasks declined per week +declined_summary <- dsl_df %>% + group_by(week_index, task_status, source) %>% + summarise(count = n(), .groups = 'drop') |> + group_by(week_index, source) %>% + mutate(proportion = count / sum(count)) %>% + ungroup() + +declined_summary|> + ggplot(aes(x = week_index, y = proportion, color = task_status, fill=task_status, group = task_status)) + + geom_smooth()+ + scale_color_viridis_d(option='turbo') + + scale_fill_viridis_d(option='turbo') + + facet_grid(source ~ ., scales = "free_y") + + geom_point() + + labs(title = "Task status for new tasks by week created", + x = "Weeks from feature deployment", + y = "% of items in status", + color = "Task Status") + + theme_minimal() + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +# count of gerrit links to operations, extensions, or core +gerrit_summary <- dsl_df |> + filter(!is.na(gerrit_repo)) |> + mutate( + focal_repo = case_when( + str_detect(gerrit_repo, "extensions") ~ "extensions", + str_detect(gerrit_repo, "operations") ~ "operations", + str_detect(gerrit_repo, "integration") ~ "operations", + TRUE ~ "core" + ) + ) |> + group_by(week_index, focal_repo, source) %>% + summarise(count = n(), .groups = 'drop') + +gerrit_summary|> + ggplot(aes(x = week_index, y = count, color = focal_repo, fill=focal_repo, group = focal_repo)) + + geom_smooth()+ + scale_color_viridis_d(option='turbo') + + scale_fill_viridis_d(option='turbo') + + facet_grid(source ~ ., scales = "free_y") + + geom_point() + + labs(title = "Task status for new tasks by week created", + x = "Weeks from feature deployment", + y = "% of items in status", + color = "Task Status") + + theme_minimal() diff --git a/mgaughan-rstudio-server_31713063.out b/mgaughan-rstudio-server_31713063.out new file mode 100644 index 0000000..cc3b071 --- /dev/null +++ b/mgaughan-rstudio-server_31713063.out @@ -0,0 +1,17 @@ +1. SSH tunnel from your workstation using the following command: + + ssh -N -L 8787:n3439:48979 mjilg@klone.hyak.uw.edu + + and point your web browser to http://localhost:8787 + +2. log in to RStudio Server using the following credentials: + + user: mjilg + password: eTkReY42xI/86STHG+4f + +When done using RStudio Server, terminate the job by: + +1. Exit the RStudio Session ("power" button in the top right corner of the RStudio window) +2. Issue the following command on the login node: + + scancel -f 31713063