diff --git a/analysis_data/scratch.R b/analysis_data/scratch.R new file mode 100644 index 0000000..3263733 --- /dev/null +++ b/analysis_data/scratch.R @@ -0,0 +1,34 @@ +library(tidyverse) +main_csv <-"~/analysis_data/120725_unified.csv" +main_df <- read.csv(main_csv, header = TRUE) + +author_closer <- main_df |> + filter(comment_type == "task_description") |> + filter(author_closer == "True") +table(author_closer$isAuthorWMF) + + +new_authors_summary <- main_df |> + filter(comment_type == "task_description") |> + group_by(source, AuthorPHID) |> + summarise( + task_count = n(), + first_task = min(week_index) + ) |> + group_by(first_task, source) |> + summarise( + new_authors_count = n() + ) +ggplot(new_authors_summary, aes(x = first_task, y = new_authors_count)) + + facet_grid(source ~ ., + scales = "free_y", + labeller = labeller(source = c("c1" = "VisualEditor", + "c2" = "HTTPS-login", + "c3" = "HTTP-deprecation"))) + + geom_col() + + labs( + x = "Date of first task", + y = "Number of tasks created", + title = "Task count by Author's first task date" + ) + + theme_minimal() diff --git a/doc_plots/rq1_plots/120825_c1_ttr.png b/doc_plots/rq1_plots/120825_c1_ttr.png new file mode 100644 index 0000000..3971b7b Binary files /dev/null and b/doc_plots/rq1_plots/120825_c1_ttr.png differ diff --git a/doc_plots/rq1_plots/120825_tasks_created.png b/doc_plots/rq1_plots/120825_tasks_created.png new file mode 100644 index 0000000..539d6e1 Binary files /dev/null and b/doc_plots/rq1_plots/120825_tasks_created.png differ diff --git a/doc_plots/rq1_plots/120825_tasks_status.png b/doc_plots/rq1_plots/120825_tasks_status.png new file mode 100644 index 0000000..fb240bf Binary files /dev/null and b/doc_plots/rq1_plots/120825_tasks_status.png differ diff --git a/doc_plots/rq1_plots/120825_triage_priority.png b/doc_plots/rq1_plots/120825_triage_priority.png new file mode 100644 index 0000000..76b1ba8 Binary files /dev/null and b/doc_plots/rq1_plots/120825_triage_priority.png differ diff --git a/dsl/final_bivariate.R b/dsl/final_bivariate.R index 3e8f284..383bf80 100644 --- a/dsl/final_bivariate.R +++ b/dsl/final_bivariate.R @@ -16,6 +16,38 @@ data_summary <- dsl_df %>% ungroup() library(ggdist) +priority_plot_summary <- data_summary |> + filter(priority == "Needs Triage" | + priority == "Unbreak Now!" | + priority == "High") +priority_plot <- priority_plot_summary |> + ggplot(aes(x = week_index, y = proportion, fill = priority)) + + facet_grid(source ~ ., + scales = "free_y", + labeller = labeller(source = c("c1" = "VisualEditor", + "c2" = "HTTPS-login", + "c3" = "HTTP-deprecation"))) + + geom_col(position = position_dodge(width = 0.9), width = 0.8) + + scale_color_viridis_d(option='turbo') + + scale_fill_viridis_d(option='turbo') + + geom_vline(xintercept = 0, linetype = "dashed", color = "black", linewidth = 0.5) + + geom_text( + data = subset(priority_plot_summary, source == "c1" & week_index == 6), + aes(x=week_index, y=0.6, label='Opt-out deployment'), + size = 2.5) + + labs(title = "Triage priority proportions for new tasks by week created", + x = "Weeks from feature deployment", + y = "% of items tagged", + fill = "Priority Tag") + + theme_minimal() + theme(legend.position = "top") +priority_plot +ggsave( + filename = "120825_triage_priority.png", + plot = priority_plot, + width = 12, # inches + height = 4, # inches + dpi = 600 # high resolution +) data_summary|> filter(priority == "Needs Triage" | @@ -27,33 +59,51 @@ data_summary|> scale_fill_viridis_d(option='turbo') + facet_grid(source ~ ., scales = "free_y") + geom_point() + - labs(title = "Triage priority proportions for new tasks by week created", + labs(title = "Proportions of Triage Priority by Week", x = "Weeks from feature deployment", y = "% of items tagged", color = "Priority Tag") + theme_minimal() -dsl_df |> +c1_ttr_plot <- dsl_df |> filter(priority == "Needs Triage" | priority == "Unbreak Now!" | priority == "High") |> filter(week_index >= -26) |> + filter(source == 'c1') |> ggplot( aes( x=as.factor(week_index), y= TTR/168, - color=priority, fill=priority ) ) + - facet_grid(source ~ .) + + facet_grid(source ~ ., + scales = "free_y", + labeller = labeller(source = c("c1" = "VisualEditor", + "c2" = "HTTPS-login", + "c3" = "HTTP-deprecation"))) + geom_boxplot(outlier.shape = NA) + theme_minimal() + - coord_cartesian(ylim = c(0, 112)) + + coord_cartesian(ylim = c(0, 52)) + geom_vline(xintercept =27, linetype = "dashed", color = "black", linewidth = 0.5) + - scale_color_viridis_d() + - labs(x = "Weeks from Release", y = "Time to Resolution (weeks)", title = "TTR by Task Creation Date and Triage Priority") - + scale_fill_viridis_d(option='turbo') + + geom_text( + aes(x= 25, y=45, label='Opt-out deployment'), + size = 4) + + labs(x = "Weeks from Release", + y = "Time to Resolution (weeks)", + fill = "Priority Tag", + title = "VisualEditor Time to Resolution by Triage Priority") + + theme(legend.position = "top") +c1_ttr_plot +ggsave( + filename = "120825_c1_ttr.png", + plot = c1_ttr_plot, + width = 12, # inches + height = 4, # inches + dpi = 600 # high resolution +) dsl_df_long <- dsl_df %>% pivot_longer( @@ -101,7 +151,7 @@ weekly_summary <- dsl_df |> median_comments_before_resolution = median(n_comments_before) ) -ggplot( +tasks_created <- ggplot( weekly_summary, aes( x=week_index, @@ -109,7 +159,11 @@ ggplot( fill=isAuthorWMF ) ) + - facet_grid(source ~ ., scales = "free_y") + + facet_grid(source ~ ., + scales = "free_y", + labeller = labeller(source = c("c1" = "VisualEditor", + "c2" = "HTTPS-login", + "c3" = "HTTP-deprecation"))) + geom_col(position = position_dodge(width = 0.9), width = 0.8) + geom_vline(data = weekly_summary |> filter(source == "c1"), aes(xintercept = -29), @@ -133,9 +187,35 @@ ggplot( aes(xintercept = -3), linetype = "3313", color = "black", linewidth = 0.5) + geom_vline(xintercept = 0, linetype = "dashed", color = "black", linewidth = 0.5) + + geom_text( + data = subset(weekly_summary, source == "c1" & week_index == 6), + aes(x=week_index, y=120, label='Opt-out deployment'), + size = 2.5) + + geom_text( + data = subset(weekly_summary, source == "c1" & week_index == -33), + aes(x=week_index, y=120, label='Opt-in Testing'), + size = 2.5) + + geom_text( + data = subset(weekly_summary, source == "c2" & week_index == -12), + aes(x=week_index, y=20, label='Deployment Announcement'), + size = 2.5) + theme_minimal() + - scale_fill_viridis_d() - + scale_fill_viridis_d() + + labs( + x = "Weeks from Feature Deployment", + y = "Count of Tasks Created", + title = "Phabricator Tasks Created by Week and Author Affiliation", + fill = "Task Author Affiliated with WMF?" + ) + + theme(legend.position = "top") +tasks_created +ggsave( + filename = "120825_tasks_created.png", + plot = tasks_created, + width = 12, # inches + height = 4, # inches + dpi = 600 # high resolution +) outcome_summary <- dsl_df |> group_by(source, isAuthorWMF)|> diff --git a/dsl/rq1_plots.R b/dsl/rq1_plots.R index ca80b31..20aca6b 100644 --- a/dsl/rq1_plots.R +++ b/dsl/rq1_plots.R @@ -79,19 +79,33 @@ declined_summary <- dsl_df %>% mutate(proportion = count / sum(count)) %>% ungroup() -declined_summary|> - ggplot(aes(x = week_index, y = proportion, color = task_status, fill=task_status, group = task_status)) + - geom_smooth()+ - scale_color_viridis_d(option='turbo') + - scale_fill_viridis_d(option='turbo') + - facet_grid(source ~ ., scales = "free_y") + - geom_point() + - labs(title = "Task status for new tasks by week created", +task_status_plot <- declined_summary|> + ggplot(aes(x = week_index, y = proportion, fill=task_status,)) + + facet_grid(source ~ ., + scales = "free_y", + labeller = labeller(source = c("c1" = "VisualEditor", + "c2" = "HTTPS-login", + "c3" = "HTTP-deprecation"))) + + geom_col(position = position_dodge(width = 0.9), width = 0.8) + + scale_fill_viridis_d(option='magma') + + labs(title = "Task Status (as of February 28, 2025) by Week", x = "Weeks from feature deployment", y = "% of items in status", - color = "Task Status") + - theme_minimal() - + fill = "Task Status") + + geom_vline(xintercept = 0, linetype = "dashed", color = "black", linewidth = 0.5) + + geom_text( + data = subset(declined_summary, source == "c1" & week_index == 6), + aes(x=week_index, y=0.9, label='Opt-out deployment'), + size = 2.5) + + theme_minimal() + + theme(legend.position = "top") +ggsave( + filename = "120825_tasks_status.png", + plot = task_status_plot, + width = 12, # inches + height = 4, # inches + dpi = 600 # high resolution +) diff --git a/mgaughan-rstudio-server_31713063.out b/mgaughan-rstudio-server_31713063.out deleted file mode 100644 index cc3b071..0000000 --- a/mgaughan-rstudio-server_31713063.out +++ /dev/null @@ -1,17 +0,0 @@ -1. SSH tunnel from your workstation using the following command: - - ssh -N -L 8787:n3439:48979 mjilg@klone.hyak.uw.edu - - and point your web browser to http://localhost:8787 - -2. log in to RStudio Server using the following credentials: - - user: mjilg - password: eTkReY42xI/86STHG+4f - -When done using RStudio Server, terminate the job by: - -1. Exit the RStudio Session ("power" button in the top right corner of the RStudio window) -2. Issue the following command on the login node: - - scancel -f 31713063