1
0

analysis and plots for draft results section

This commit is contained in:
Matthew Gaughan 2025-12-08 10:53:07 -08:00
parent c010e9f9cf
commit e7e1bb3458
8 changed files with 151 additions and 40 deletions

34
analysis_data/scratch.R Normal file
View File

@ -0,0 +1,34 @@
library(tidyverse)
main_csv <-"~/analysis_data/120725_unified.csv"
main_df <- read.csv(main_csv, header = TRUE)
author_closer <- main_df |>
filter(comment_type == "task_description") |>
filter(author_closer == "True")
table(author_closer$isAuthorWMF)
new_authors_summary <- main_df |>
filter(comment_type == "task_description") |>
group_by(source, AuthorPHID) |>
summarise(
task_count = n(),
first_task = min(week_index)
) |>
group_by(first_task, source) |>
summarise(
new_authors_count = n()
)
ggplot(new_authors_summary, aes(x = first_task, y = new_authors_count)) +
facet_grid(source ~ .,
scales = "free_y",
labeller = labeller(source = c("c1" = "VisualEditor",
"c2" = "HTTPS-login",
"c3" = "HTTP-deprecation"))) +
geom_col() +
labs(
x = "Date of first task",
y = "Number of tasks created",
title = "Task count by Author's first task date"
) +
theme_minimal()

Binary file not shown.

After

Width:  |  Height:  |  Size: 298 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 331 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 352 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 312 KiB

View File

@ -16,6 +16,38 @@ data_summary <- dsl_df %>%
ungroup()
library(ggdist)
priority_plot_summary <- data_summary |>
filter(priority == "Needs Triage" |
priority == "Unbreak Now!" |
priority == "High")
priority_plot <- priority_plot_summary |>
ggplot(aes(x = week_index, y = proportion, fill = priority)) +
facet_grid(source ~ .,
scales = "free_y",
labeller = labeller(source = c("c1" = "VisualEditor",
"c2" = "HTTPS-login",
"c3" = "HTTP-deprecation"))) +
geom_col(position = position_dodge(width = 0.9), width = 0.8) +
scale_color_viridis_d(option='turbo') +
scale_fill_viridis_d(option='turbo') +
geom_vline(xintercept = 0, linetype = "dashed", color = "black", linewidth = 0.5) +
geom_text(
data = subset(priority_plot_summary, source == "c1" & week_index == 6),
aes(x=week_index, y=0.6, label='Opt-out deployment'),
size = 2.5) +
labs(title = "Triage priority proportions for new tasks by week created",
x = "Weeks from feature deployment",
y = "% of items tagged",
fill = "Priority Tag") +
theme_minimal() + theme(legend.position = "top")
priority_plot
ggsave(
filename = "120825_triage_priority.png",
plot = priority_plot,
width = 12, # inches
height = 4, # inches
dpi = 600 # high resolution
)
data_summary|>
filter(priority == "Needs Triage" |
@ -27,33 +59,51 @@ data_summary|>
scale_fill_viridis_d(option='turbo') +
facet_grid(source ~ ., scales = "free_y") +
geom_point() +
labs(title = "Triage priority proportions for new tasks by week created",
labs(title = "Proportions of Triage Priority by Week",
x = "Weeks from feature deployment",
y = "% of items tagged",
color = "Priority Tag") +
theme_minimal()
dsl_df |>
c1_ttr_plot <- dsl_df |>
filter(priority == "Needs Triage" |
priority == "Unbreak Now!" |
priority == "High") |>
filter(week_index >= -26) |>
filter(source == 'c1') |>
ggplot(
aes(
x=as.factor(week_index),
y= TTR/168,
color=priority,
fill=priority
)
) +
facet_grid(source ~ .) +
facet_grid(source ~ .,
scales = "free_y",
labeller = labeller(source = c("c1" = "VisualEditor",
"c2" = "HTTPS-login",
"c3" = "HTTP-deprecation"))) +
geom_boxplot(outlier.shape = NA) +
theme_minimal() +
coord_cartesian(ylim = c(0, 112)) +
coord_cartesian(ylim = c(0, 52)) +
geom_vline(xintercept =27, linetype = "dashed", color = "black", linewidth = 0.5) +
scale_color_viridis_d() +
labs(x = "Weeks from Release", y = "Time to Resolution (weeks)", title = "TTR by Task Creation Date and Triage Priority")
scale_fill_viridis_d(option='turbo') +
geom_text(
aes(x= 25, y=45, label='Opt-out deployment'),
size = 4) +
labs(x = "Weeks from Release",
y = "Time to Resolution (weeks)",
fill = "Priority Tag",
title = "VisualEditor Time to Resolution by Triage Priority") +
theme(legend.position = "top")
c1_ttr_plot
ggsave(
filename = "120825_c1_ttr.png",
plot = c1_ttr_plot,
width = 12, # inches
height = 4, # inches
dpi = 600 # high resolution
)
dsl_df_long <- dsl_df %>%
pivot_longer(
@ -101,7 +151,7 @@ weekly_summary <- dsl_df |>
median_comments_before_resolution = median(n_comments_before)
)
ggplot(
tasks_created <- ggplot(
weekly_summary,
aes(
x=week_index,
@ -109,7 +159,11 @@ ggplot(
fill=isAuthorWMF
)
) +
facet_grid(source ~ ., scales = "free_y") +
facet_grid(source ~ .,
scales = "free_y",
labeller = labeller(source = c("c1" = "VisualEditor",
"c2" = "HTTPS-login",
"c3" = "HTTP-deprecation"))) +
geom_col(position = position_dodge(width = 0.9), width = 0.8) +
geom_vline(data = weekly_summary |> filter(source == "c1"),
aes(xintercept = -29),
@ -133,9 +187,35 @@ ggplot(
aes(xintercept = -3),
linetype = "3313", color = "black", linewidth = 0.5) +
geom_vline(xintercept = 0, linetype = "dashed", color = "black", linewidth = 0.5) +
geom_text(
data = subset(weekly_summary, source == "c1" & week_index == 6),
aes(x=week_index, y=120, label='Opt-out deployment'),
size = 2.5) +
geom_text(
data = subset(weekly_summary, source == "c1" & week_index == -33),
aes(x=week_index, y=120, label='Opt-in Testing'),
size = 2.5) +
geom_text(
data = subset(weekly_summary, source == "c2" & week_index == -12),
aes(x=week_index, y=20, label='Deployment Announcement'),
size = 2.5) +
theme_minimal() +
scale_fill_viridis_d()
scale_fill_viridis_d() +
labs(
x = "Weeks from Feature Deployment",
y = "Count of Tasks Created",
title = "Phabricator Tasks Created by Week and Author Affiliation",
fill = "Task Author Affiliated with WMF?"
) +
theme(legend.position = "top")
tasks_created
ggsave(
filename = "120825_tasks_created.png",
plot = tasks_created,
width = 12, # inches
height = 4, # inches
dpi = 600 # high resolution
)
outcome_summary <- dsl_df |>
group_by(source, isAuthorWMF)|>

View File

@ -79,19 +79,33 @@ declined_summary <- dsl_df %>%
mutate(proportion = count / sum(count)) %>%
ungroup()
declined_summary|>
ggplot(aes(x = week_index, y = proportion, color = task_status, fill=task_status, group = task_status)) +
geom_smooth()+
scale_color_viridis_d(option='turbo') +
scale_fill_viridis_d(option='turbo') +
facet_grid(source ~ ., scales = "free_y") +
geom_point() +
labs(title = "Task status for new tasks by week created",
task_status_plot <- declined_summary|>
ggplot(aes(x = week_index, y = proportion, fill=task_status,)) +
facet_grid(source ~ .,
scales = "free_y",
labeller = labeller(source = c("c1" = "VisualEditor",
"c2" = "HTTPS-login",
"c3" = "HTTP-deprecation"))) +
geom_col(position = position_dodge(width = 0.9), width = 0.8) +
scale_fill_viridis_d(option='magma') +
labs(title = "Task Status (as of February 28, 2025) by Week",
x = "Weeks from feature deployment",
y = "% of items in status",
color = "Task Status") +
theme_minimal()
fill = "Task Status") +
geom_vline(xintercept = 0, linetype = "dashed", color = "black", linewidth = 0.5) +
geom_text(
data = subset(declined_summary, source == "c1" & week_index == 6),
aes(x=week_index, y=0.9, label='Opt-out deployment'),
size = 2.5) +
theme_minimal() +
theme(legend.position = "top")
ggsave(
filename = "120825_tasks_status.png",
plot = task_status_plot,
width = 12, # inches
height = 4, # inches
dpi = 600 # high resolution
)

View File

@ -1,17 +0,0 @@
1. SSH tunnel from your workstation using the following command:
ssh -N -L 8787:n3439:48979 mjilg@klone.hyak.uw.edu
and point your web browser to http://localhost:8787
2. log in to RStudio Server using the following credentials:
user: mjilg
password: eTkReY42xI/86STHG+4f
When done using RStudio Server, terminate the job by:
1. Exit the RStudio Session ("power" button in the top right corner of the RStudio window)
2. Issue the following command on the login node:
scancel -f 31713063