1
0

updating rq1 plots

This commit is contained in:
Matthew Gaughan 2025-12-07 13:32:35 -08:00
parent 108b8aacd6
commit c010e9f9cf
2 changed files with 171 additions and 0 deletions

154
dsl/rq1_plots.R Normal file
View File

@ -0,0 +1,154 @@
library(tidyverse)
#library(dsl)
library(dplyr)
dsl_csv <-"~/dsl/120725_DSL_frame.csv"
dsl_df <- read.csv(dsl_csv, header = TRUE)
dsl_df <- dsl_df |>
mutate(priority = factor(priority,
levels = rev(c("Unbreak Now!", "High", "Medium", "Low", "Lowest", "Needs Triage"))))
# n authors who also close their own
ac_summary <- dsl_df %>%
group_by(week_index, author_closer, source) %>%
summarise(count = n(), .groups = 'drop') |>
group_by(week_index, source) %>%
mutate(proportion = count / sum(count)) %>%
ungroup()
ac_summary|>
ggplot(aes(x = week_index, y = proportion, color = author_closer, fill=author_closer, group = author_closer)) +
geom_smooth()+
scale_color_viridis_d(option='turbo') +
scale_fill_viridis_d(option='turbo') +
facet_grid(source ~ ., scales = "free_y") +
geom_point() +
labs(title = "Did the Task Author also close the task? by week created",
x = "Weeks from feature deployment",
y = "% of work-items",
color = "Did the Task Author also Close the Task?") +
theme_minimal()
# % of EP
ggplot(
dsl_df,
aes(
x=week_index,
y=olmo_EP_prop_adac,
color=isAuthorWMF
)
) +
facet_grid(source ~ .) +
geom_point() +
geom_smooth() +
scale_color_viridis_d() +
theme_minimal() +
labs(x = "Weeks from Release", y = "% of sentences machine-tagged as'Existent Problems'", title = "Proportion of 'Existent Problems' tags over time")
# TTR by affiliation and priority
dsl_df |>
filter(priority == "Needs Triage" |
priority == "Unbreak Now!" |
priority == "High") |>
filter(week_index >= -26) |>
ggplot(
aes(
x=as.factor(week_index),
y= TTR/168,
color=priority,
fill=priority,
linetype=isAuthorWMF
)
) +
facet_grid(source ~ .) +
geom_boxplot(outlier.shape = NA) +
theme_minimal() +
coord_cartesian(ylim = c(0, 156)) +
geom_vline(xintercept =27, linetype = "dashed", color = "black", linewidth = 0.5) +
scale_color_viridis_d() +
labs(x = "Weeks from Release", y = "Time to Resolution (weeks)", title = "TTR by Task Creation Date and Triage Priority")
# % of tasks declined per week
declined_summary <- dsl_df %>%
group_by(week_index, task_status, source) %>%
summarise(count = n(), .groups = 'drop') |>
group_by(week_index, source) %>%
mutate(proportion = count / sum(count)) %>%
ungroup()
declined_summary|>
ggplot(aes(x = week_index, y = proportion, color = task_status, fill=task_status, group = task_status)) +
geom_smooth()+
scale_color_viridis_d(option='turbo') +
scale_fill_viridis_d(option='turbo') +
facet_grid(source ~ ., scales = "free_y") +
geom_point() +
labs(title = "Task status for new tasks by week created",
x = "Weeks from feature deployment",
y = "% of items in status",
color = "Task Status") +
theme_minimal()
# count of gerrit links to operations, extensions, or core
gerrit_summary <- dsl_df |>
filter(!is.na(gerrit_repo)) |>
mutate(
focal_repo = case_when(
str_detect(gerrit_repo, "extensions") ~ "extensions",
str_detect(gerrit_repo, "operations") ~ "operations",
str_detect(gerrit_repo, "integration") ~ "operations",
TRUE ~ "core"
)
) |>
group_by(week_index, focal_repo, source) %>%
summarise(count = n(), .groups = 'drop')
gerrit_summary|>
ggplot(aes(x = week_index, y = count, color = focal_repo, fill=focal_repo, group = focal_repo)) +
geom_smooth()+
scale_color_viridis_d(option='turbo') +
scale_fill_viridis_d(option='turbo') +
facet_grid(source ~ ., scales = "free_y") +
geom_point() +
labs(title = "Task status for new tasks by week created",
x = "Weeks from feature deployment",
y = "% of items in status",
color = "Task Status") +
theme_minimal()

View File

@ -0,0 +1,17 @@
1. SSH tunnel from your workstation using the following command:
ssh -N -L 8787:n3439:48979 mjilg@klone.hyak.uw.edu
and point your web browser to http://localhost:8787
2. log in to RStudio Server using the following credentials:
user: mjilg
password: eTkReY42xI/86STHG+4f
When done using RStudio Server, terminate the job by:
1. Exit the RStudio Session ("power" button in the top right corner of the RStudio window)
2. Issue the following command on the login node:
scancel -f 31713063