1
0

generated some more descriptive stats for Results

This commit is contained in:
Matthew Gaughan 2026-01-06 21:37:24 -08:00
parent c1dd41b095
commit 3cfe103730
3 changed files with 49 additions and 22 deletions

View File

@ -1,4 +1,5 @@
library(tidyverse) library(tidyverse)
library(dplyr)
main_csv <-"~/analysis_data/121625_unified.csv" main_csv <-"~/analysis_data/121625_unified.csv"
main_df <- read.csv(main_csv, header = TRUE) main_df <- read.csv(main_csv, header = TRUE)
@ -7,7 +8,7 @@ bz_summary <- main_df |>
AuthorPHID == "PHID-USER-idceizaw6elwiwm5xshb", TRUE, FALSE AuthorPHID == "PHID-USER-idceizaw6elwiwm5xshb", TRUE, FALSE
)) |> )) |>
group_by(source, comment_type, isBz)|> group_by(source, comment_type, isBz)|>
summarise(count = n(), .groups = 'drop') summarize(count = n(), .groups = 'drop')
needs_triage <- dsl_df |> needs_triage <- dsl_df |>
@ -34,7 +35,7 @@ changes<- dsl_df |>
filter(!is.na(period)) |> filter(!is.na(period)) |>
mutate(ttr_weeks = TTR/168) |> mutate(ttr_weeks = TTR/168) |>
group_by(source, period, priority) %>% group_by(source, period, priority) %>%
summarise( summarize(
count = n(), count = n(),
mean_ttr_weeks = mean(ttr_weeks, na.rm = TRUE), mean_ttr_weeks = mean(ttr_weeks, na.rm = TRUE),
sd_ttr_weeks = sd(ttr_weeks, na.rm = TRUE), sd_ttr_weeks = sd(ttr_weeks, na.rm = TRUE),
@ -45,7 +46,7 @@ changes<- dsl_df |>
first_task <- main_df |> first_task <- main_df |>
filter(comment_type == "task_description") |> filter(comment_type == "task_description") |>
group_by(source, AuthorPHID) |> group_by(source, AuthorPHID) |>
summarise( summarize(
task_count = n(), task_count = n(),
first_task_week = min(week_index) first_task_week = min(week_index)
) )
@ -56,7 +57,14 @@ tasks_flagged <- main_df %>%
mutate(is_first_time_author = week_index == first_task_week) mutate(is_first_time_author = week_index == first_task_week)
summary_df <- tasks_flagged %>% summary_df <- tasks_flagged %>%
group_by(week_index, source) %>% mutate(
period = case_when(
week_index >= -4 & week_index <= 4 ~ "8 weeks after announcement",
week_index >= -13 & week_index <= -5 ~ "8 weeks before deployment announcement",
TRUE ~ NA
)
) |>
group_by(period, source, isAuthorWMF) %>%
summarize( summarize(
total_tasks = n(), total_tasks = n(),
first_time_tasks = sum(is_first_time_author), first_time_tasks = sum(is_first_time_author),

View File

@ -3,6 +3,43 @@ library(tidyverse)
dsl_csv <-"~/dsl/121625_DSL_frame.csv" dsl_csv <-"~/dsl/121625_DSL_frame.csv"
dsl_df <- read.csv(dsl_csv, header = TRUE) dsl_df <- read.csv(dsl_csv, header = TRUE)
ttr_trajectory <- dsl_df |>
mutate(ttr_weeks = TTR_hours / 168) |>
mutate(isTriaged = if_else(priority == 'Needs Triage',
"Not Triaged",
"Triaged")) |>
group_by(week_index, isTriaged, source) |>
summarise(
count = n(),
mean_ttr = mean(ttr_weeks, na.rm = TRUE),
sd_ttr = sd(ttr_weeks, na.rm = TRUE)
)
ggplot(ttr_trajectory, aes(x = week_index)) +
# Line for mean TTR
geom_line(aes(y = mean_ttr, color = "Mean TTR"), size = 1) +
# Ribbon for standard deviation
geom_ribbon(aes(ymin = mean_ttr - sd_ttr, ymax = mean_ttr + sd_ttr),
fill = "lightblue", alpha = 0.4) +
# Line for count of tasks
geom_line(aes(y = count,
color = "Count of New Tasks"), size = 1, linetype = "dashed") +
# Facet the plot by source and triaged status
facet_wrap(source ~ isTriaged, scales = "free_y") +
labs(
title = "TTR by Source and Triage Status (TODO)",
x = "Week Index",
y = "Mean TTR (in weeks)",
color = "Metrics"
) +
scale_color_manual(values = c("Mean TTR" = "blue", "Count of New Tasks" = "red")) +
theme_minimal() +
theme(
strip.text = element_text(face = "bold", size = 12),
axis.text = element_text(size = 10),
axis.title = element_text(size = 12)
)
dsl_df |> dsl_df |>
filter(week_index >= 4)|> filter(week_index >= 4)|>
filter(source == "c1") |> filter(source == "c1") |>

View File

@ -1,18 +0,0 @@
1. SSH tunnel from your workstation using the following command:
ssh -N -L 8787:n3439:57601 mjilg@klone.hyak.uw.edu
and point your web browser to http://localhost:8787
2. log in to RStudio Server using the following credentials:
user: mjilg
password: OmvStzwArWC2NNHj/j8p
When done using RStudio Server, terminate the job by:
1. Exit the RStudio Session ("power" button in the top right corner of the RStudio window)
2. Issue the following command on the login node:
scancel -f 32164721
[2026-01-06T14:05:14.031] error: *** JOB 32164721 ON n3439 CANCELLED AT 2026-01-06T14:05:14 DUE TO TIME LIMIT ***