analysis and plots for draft results section
This commit is contained in:
parent
c010e9f9cf
commit
e7e1bb3458
34
analysis_data/scratch.R
Normal file
34
analysis_data/scratch.R
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
library(tidyverse)
|
||||||
|
main_csv <-"~/analysis_data/120725_unified.csv"
|
||||||
|
main_df <- read.csv(main_csv, header = TRUE)
|
||||||
|
|
||||||
|
author_closer <- main_df |>
|
||||||
|
filter(comment_type == "task_description") |>
|
||||||
|
filter(author_closer == "True")
|
||||||
|
table(author_closer$isAuthorWMF)
|
||||||
|
|
||||||
|
|
||||||
|
new_authors_summary <- main_df |>
|
||||||
|
filter(comment_type == "task_description") |>
|
||||||
|
group_by(source, AuthorPHID) |>
|
||||||
|
summarise(
|
||||||
|
task_count = n(),
|
||||||
|
first_task = min(week_index)
|
||||||
|
) |>
|
||||||
|
group_by(first_task, source) |>
|
||||||
|
summarise(
|
||||||
|
new_authors_count = n()
|
||||||
|
)
|
||||||
|
ggplot(new_authors_summary, aes(x = first_task, y = new_authors_count)) +
|
||||||
|
facet_grid(source ~ .,
|
||||||
|
scales = "free_y",
|
||||||
|
labeller = labeller(source = c("c1" = "VisualEditor",
|
||||||
|
"c2" = "HTTPS-login",
|
||||||
|
"c3" = "HTTP-deprecation"))) +
|
||||||
|
geom_col() +
|
||||||
|
labs(
|
||||||
|
x = "Date of first task",
|
||||||
|
y = "Number of tasks created",
|
||||||
|
title = "Task count by Author's first task date"
|
||||||
|
) +
|
||||||
|
theme_minimal()
|
||||||
BIN
doc_plots/rq1_plots/120825_c1_ttr.png
Normal file
BIN
doc_plots/rq1_plots/120825_c1_ttr.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 298 KiB |
BIN
doc_plots/rq1_plots/120825_tasks_created.png
Normal file
BIN
doc_plots/rq1_plots/120825_tasks_created.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 331 KiB |
BIN
doc_plots/rq1_plots/120825_tasks_status.png
Normal file
BIN
doc_plots/rq1_plots/120825_tasks_status.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 352 KiB |
BIN
doc_plots/rq1_plots/120825_triage_priority.png
Normal file
BIN
doc_plots/rq1_plots/120825_triage_priority.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 312 KiB |
@ -16,6 +16,38 @@ data_summary <- dsl_df %>%
|
|||||||
ungroup()
|
ungroup()
|
||||||
|
|
||||||
library(ggdist)
|
library(ggdist)
|
||||||
|
priority_plot_summary <- data_summary |>
|
||||||
|
filter(priority == "Needs Triage" |
|
||||||
|
priority == "Unbreak Now!" |
|
||||||
|
priority == "High")
|
||||||
|
priority_plot <- priority_plot_summary |>
|
||||||
|
ggplot(aes(x = week_index, y = proportion, fill = priority)) +
|
||||||
|
facet_grid(source ~ .,
|
||||||
|
scales = "free_y",
|
||||||
|
labeller = labeller(source = c("c1" = "VisualEditor",
|
||||||
|
"c2" = "HTTPS-login",
|
||||||
|
"c3" = "HTTP-deprecation"))) +
|
||||||
|
geom_col(position = position_dodge(width = 0.9), width = 0.8) +
|
||||||
|
scale_color_viridis_d(option='turbo') +
|
||||||
|
scale_fill_viridis_d(option='turbo') +
|
||||||
|
geom_vline(xintercept = 0, linetype = "dashed", color = "black", linewidth = 0.5) +
|
||||||
|
geom_text(
|
||||||
|
data = subset(priority_plot_summary, source == "c1" & week_index == 6),
|
||||||
|
aes(x=week_index, y=0.6, label='Opt-out deployment'),
|
||||||
|
size = 2.5) +
|
||||||
|
labs(title = "Triage priority proportions for new tasks by week created",
|
||||||
|
x = "Weeks from feature deployment",
|
||||||
|
y = "% of items tagged",
|
||||||
|
fill = "Priority Tag") +
|
||||||
|
theme_minimal() + theme(legend.position = "top")
|
||||||
|
priority_plot
|
||||||
|
ggsave(
|
||||||
|
filename = "120825_triage_priority.png",
|
||||||
|
plot = priority_plot,
|
||||||
|
width = 12, # inches
|
||||||
|
height = 4, # inches
|
||||||
|
dpi = 600 # high resolution
|
||||||
|
)
|
||||||
|
|
||||||
data_summary|>
|
data_summary|>
|
||||||
filter(priority == "Needs Triage" |
|
filter(priority == "Needs Triage" |
|
||||||
@ -27,33 +59,51 @@ data_summary|>
|
|||||||
scale_fill_viridis_d(option='turbo') +
|
scale_fill_viridis_d(option='turbo') +
|
||||||
facet_grid(source ~ ., scales = "free_y") +
|
facet_grid(source ~ ., scales = "free_y") +
|
||||||
geom_point() +
|
geom_point() +
|
||||||
labs(title = "Triage priority proportions for new tasks by week created",
|
labs(title = "Proportions of Triage Priority by Week",
|
||||||
x = "Weeks from feature deployment",
|
x = "Weeks from feature deployment",
|
||||||
y = "% of items tagged",
|
y = "% of items tagged",
|
||||||
color = "Priority Tag") +
|
color = "Priority Tag") +
|
||||||
theme_minimal()
|
theme_minimal()
|
||||||
|
|
||||||
dsl_df |>
|
c1_ttr_plot <- dsl_df |>
|
||||||
filter(priority == "Needs Triage" |
|
filter(priority == "Needs Triage" |
|
||||||
priority == "Unbreak Now!" |
|
priority == "Unbreak Now!" |
|
||||||
priority == "High") |>
|
priority == "High") |>
|
||||||
filter(week_index >= -26) |>
|
filter(week_index >= -26) |>
|
||||||
|
filter(source == 'c1') |>
|
||||||
ggplot(
|
ggplot(
|
||||||
aes(
|
aes(
|
||||||
x=as.factor(week_index),
|
x=as.factor(week_index),
|
||||||
y= TTR/168,
|
y= TTR/168,
|
||||||
color=priority,
|
|
||||||
fill=priority
|
fill=priority
|
||||||
)
|
)
|
||||||
) +
|
) +
|
||||||
facet_grid(source ~ .) +
|
facet_grid(source ~ .,
|
||||||
|
scales = "free_y",
|
||||||
|
labeller = labeller(source = c("c1" = "VisualEditor",
|
||||||
|
"c2" = "HTTPS-login",
|
||||||
|
"c3" = "HTTP-deprecation"))) +
|
||||||
geom_boxplot(outlier.shape = NA) +
|
geom_boxplot(outlier.shape = NA) +
|
||||||
theme_minimal() +
|
theme_minimal() +
|
||||||
coord_cartesian(ylim = c(0, 112)) +
|
coord_cartesian(ylim = c(0, 52)) +
|
||||||
geom_vline(xintercept =27, linetype = "dashed", color = "black", linewidth = 0.5) +
|
geom_vline(xintercept =27, linetype = "dashed", color = "black", linewidth = 0.5) +
|
||||||
scale_color_viridis_d() +
|
scale_fill_viridis_d(option='turbo') +
|
||||||
labs(x = "Weeks from Release", y = "Time to Resolution (weeks)", title = "TTR by Task Creation Date and Triage Priority")
|
geom_text(
|
||||||
|
aes(x= 25, y=45, label='Opt-out deployment'),
|
||||||
|
size = 4) +
|
||||||
|
labs(x = "Weeks from Release",
|
||||||
|
y = "Time to Resolution (weeks)",
|
||||||
|
fill = "Priority Tag",
|
||||||
|
title = "VisualEditor Time to Resolution by Triage Priority") +
|
||||||
|
theme(legend.position = "top")
|
||||||
|
c1_ttr_plot
|
||||||
|
ggsave(
|
||||||
|
filename = "120825_c1_ttr.png",
|
||||||
|
plot = c1_ttr_plot,
|
||||||
|
width = 12, # inches
|
||||||
|
height = 4, # inches
|
||||||
|
dpi = 600 # high resolution
|
||||||
|
)
|
||||||
|
|
||||||
dsl_df_long <- dsl_df %>%
|
dsl_df_long <- dsl_df %>%
|
||||||
pivot_longer(
|
pivot_longer(
|
||||||
@ -101,7 +151,7 @@ weekly_summary <- dsl_df |>
|
|||||||
median_comments_before_resolution = median(n_comments_before)
|
median_comments_before_resolution = median(n_comments_before)
|
||||||
)
|
)
|
||||||
|
|
||||||
ggplot(
|
tasks_created <- ggplot(
|
||||||
weekly_summary,
|
weekly_summary,
|
||||||
aes(
|
aes(
|
||||||
x=week_index,
|
x=week_index,
|
||||||
@ -109,7 +159,11 @@ ggplot(
|
|||||||
fill=isAuthorWMF
|
fill=isAuthorWMF
|
||||||
)
|
)
|
||||||
) +
|
) +
|
||||||
facet_grid(source ~ ., scales = "free_y") +
|
facet_grid(source ~ .,
|
||||||
|
scales = "free_y",
|
||||||
|
labeller = labeller(source = c("c1" = "VisualEditor",
|
||||||
|
"c2" = "HTTPS-login",
|
||||||
|
"c3" = "HTTP-deprecation"))) +
|
||||||
geom_col(position = position_dodge(width = 0.9), width = 0.8) +
|
geom_col(position = position_dodge(width = 0.9), width = 0.8) +
|
||||||
geom_vline(data = weekly_summary |> filter(source == "c1"),
|
geom_vline(data = weekly_summary |> filter(source == "c1"),
|
||||||
aes(xintercept = -29),
|
aes(xintercept = -29),
|
||||||
@ -133,9 +187,35 @@ ggplot(
|
|||||||
aes(xintercept = -3),
|
aes(xintercept = -3),
|
||||||
linetype = "3313", color = "black", linewidth = 0.5) +
|
linetype = "3313", color = "black", linewidth = 0.5) +
|
||||||
geom_vline(xintercept = 0, linetype = "dashed", color = "black", linewidth = 0.5) +
|
geom_vline(xintercept = 0, linetype = "dashed", color = "black", linewidth = 0.5) +
|
||||||
|
geom_text(
|
||||||
|
data = subset(weekly_summary, source == "c1" & week_index == 6),
|
||||||
|
aes(x=week_index, y=120, label='Opt-out deployment'),
|
||||||
|
size = 2.5) +
|
||||||
|
geom_text(
|
||||||
|
data = subset(weekly_summary, source == "c1" & week_index == -33),
|
||||||
|
aes(x=week_index, y=120, label='Opt-in Testing'),
|
||||||
|
size = 2.5) +
|
||||||
|
geom_text(
|
||||||
|
data = subset(weekly_summary, source == "c2" & week_index == -12),
|
||||||
|
aes(x=week_index, y=20, label='Deployment Announcement'),
|
||||||
|
size = 2.5) +
|
||||||
theme_minimal() +
|
theme_minimal() +
|
||||||
scale_fill_viridis_d()
|
scale_fill_viridis_d() +
|
||||||
|
labs(
|
||||||
|
x = "Weeks from Feature Deployment",
|
||||||
|
y = "Count of Tasks Created",
|
||||||
|
title = "Phabricator Tasks Created by Week and Author Affiliation",
|
||||||
|
fill = "Task Author Affiliated with WMF?"
|
||||||
|
) +
|
||||||
|
theme(legend.position = "top")
|
||||||
|
tasks_created
|
||||||
|
ggsave(
|
||||||
|
filename = "120825_tasks_created.png",
|
||||||
|
plot = tasks_created,
|
||||||
|
width = 12, # inches
|
||||||
|
height = 4, # inches
|
||||||
|
dpi = 600 # high resolution
|
||||||
|
)
|
||||||
|
|
||||||
outcome_summary <- dsl_df |>
|
outcome_summary <- dsl_df |>
|
||||||
group_by(source, isAuthorWMF)|>
|
group_by(source, isAuthorWMF)|>
|
||||||
|
|||||||
@ -79,19 +79,33 @@ declined_summary <- dsl_df %>%
|
|||||||
mutate(proportion = count / sum(count)) %>%
|
mutate(proportion = count / sum(count)) %>%
|
||||||
ungroup()
|
ungroup()
|
||||||
|
|
||||||
declined_summary|>
|
task_status_plot <- declined_summary|>
|
||||||
ggplot(aes(x = week_index, y = proportion, color = task_status, fill=task_status, group = task_status)) +
|
ggplot(aes(x = week_index, y = proportion, fill=task_status,)) +
|
||||||
geom_smooth()+
|
facet_grid(source ~ .,
|
||||||
scale_color_viridis_d(option='turbo') +
|
scales = "free_y",
|
||||||
scale_fill_viridis_d(option='turbo') +
|
labeller = labeller(source = c("c1" = "VisualEditor",
|
||||||
facet_grid(source ~ ., scales = "free_y") +
|
"c2" = "HTTPS-login",
|
||||||
geom_point() +
|
"c3" = "HTTP-deprecation"))) +
|
||||||
labs(title = "Task status for new tasks by week created",
|
geom_col(position = position_dodge(width = 0.9), width = 0.8) +
|
||||||
|
scale_fill_viridis_d(option='magma') +
|
||||||
|
labs(title = "Task Status (as of February 28, 2025) by Week",
|
||||||
x = "Weeks from feature deployment",
|
x = "Weeks from feature deployment",
|
||||||
y = "% of items in status",
|
y = "% of items in status",
|
||||||
color = "Task Status") +
|
fill = "Task Status") +
|
||||||
theme_minimal()
|
geom_vline(xintercept = 0, linetype = "dashed", color = "black", linewidth = 0.5) +
|
||||||
|
geom_text(
|
||||||
|
data = subset(declined_summary, source == "c1" & week_index == 6),
|
||||||
|
aes(x=week_index, y=0.9, label='Opt-out deployment'),
|
||||||
|
size = 2.5) +
|
||||||
|
theme_minimal() +
|
||||||
|
theme(legend.position = "top")
|
||||||
|
ggsave(
|
||||||
|
filename = "120825_tasks_status.png",
|
||||||
|
plot = task_status_plot,
|
||||||
|
width = 12, # inches
|
||||||
|
height = 4, # inches
|
||||||
|
dpi = 600 # high resolution
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -1,17 +0,0 @@
|
|||||||
1. SSH tunnel from your workstation using the following command:
|
|
||||||
|
|
||||||
ssh -N -L 8787:n3439:48979 mjilg@klone.hyak.uw.edu
|
|
||||||
|
|
||||||
and point your web browser to http://localhost:8787
|
|
||||||
|
|
||||||
2. log in to RStudio Server using the following credentials:
|
|
||||||
|
|
||||||
user: mjilg
|
|
||||||
password: eTkReY42xI/86STHG+4f
|
|
||||||
|
|
||||||
When done using RStudio Server, terminate the job by:
|
|
||||||
|
|
||||||
1. Exit the RStudio Session ("power" button in the top right corner of the RStudio window)
|
|
||||||
2. Issue the following command on the login node:
|
|
||||||
|
|
||||||
scancel -f 31713063
|
|
||||||
Loading…
Reference in New Issue
Block a user