diff --git a/analysis_data/scratch.R b/analysis_data/scratch.R index 3348bd5..27f3d2a 100644 --- a/analysis_data/scratch.R +++ b/analysis_data/scratch.R @@ -1,4 +1,5 @@ library(tidyverse) +library(dplyr) main_csv <-"~/analysis_data/121625_unified.csv" main_df <- read.csv(main_csv, header = TRUE) @@ -7,7 +8,7 @@ bz_summary <- main_df |> AuthorPHID == "PHID-USER-idceizaw6elwiwm5xshb", TRUE, FALSE )) |> group_by(source, comment_type, isBz)|> - summarise(count = n(), .groups = 'drop') + summarize(count = n(), .groups = 'drop') needs_triage <- dsl_df |> @@ -34,7 +35,7 @@ changes<- dsl_df |> filter(!is.na(period)) |> mutate(ttr_weeks = TTR/168) |> group_by(source, period, priority) %>% - summarise( + summarize( count = n(), mean_ttr_weeks = mean(ttr_weeks, na.rm = TRUE), sd_ttr_weeks = sd(ttr_weeks, na.rm = TRUE), @@ -45,7 +46,7 @@ changes<- dsl_df |> first_task <- main_df |> filter(comment_type == "task_description") |> group_by(source, AuthorPHID) |> - summarise( + summarize( task_count = n(), first_task_week = min(week_index) ) @@ -56,7 +57,14 @@ tasks_flagged <- main_df %>% mutate(is_first_time_author = week_index == first_task_week) summary_df <- tasks_flagged %>% - group_by(week_index, source) %>% + mutate( + period = case_when( + week_index >= -4 & week_index <= 4 ~ "8 weeks after announcement", + week_index >= -13 & week_index <= -5 ~ "8 weeks before deployment announcement", + TRUE ~ NA + ) + ) |> + group_by(period, source, isAuthorWMF) %>% summarize( total_tasks = n(), first_time_tasks = sum(is_first_time_author), diff --git a/dsl/scratch.R b/dsl/scratch.R index 0f91d2a..09080fd 100644 --- a/dsl/scratch.R +++ b/dsl/scratch.R @@ -3,6 +3,43 @@ library(tidyverse) dsl_csv <-"~/dsl/121625_DSL_frame.csv" dsl_df <- read.csv(dsl_csv, header = TRUE) +ttr_trajectory <- dsl_df |> + mutate(ttr_weeks = TTR_hours / 168) |> + mutate(isTriaged = if_else(priority == 'Needs Triage', + "Not Triaged", + "Triaged")) |> + group_by(week_index, isTriaged, source) |> + summarise( + count = n(), + mean_ttr = mean(ttr_weeks, na.rm = TRUE), + sd_ttr = sd(ttr_weeks, na.rm = TRUE) + ) + +ggplot(ttr_trajectory, aes(x = week_index)) + + # Line for mean TTR + geom_line(aes(y = mean_ttr, color = "Mean TTR"), size = 1) + + # Ribbon for standard deviation + geom_ribbon(aes(ymin = mean_ttr - sd_ttr, ymax = mean_ttr + sd_ttr), + fill = "lightblue", alpha = 0.4) + + # Line for count of tasks + geom_line(aes(y = count, + color = "Count of New Tasks"), size = 1, linetype = "dashed") + + # Facet the plot by source and triaged status + facet_wrap(source ~ isTriaged, scales = "free_y") + + labs( + title = "TTR by Source and Triage Status (TODO)", + x = "Week Index", + y = "Mean TTR (in weeks)", + color = "Metrics" + ) + + scale_color_manual(values = c("Mean TTR" = "blue", "Count of New Tasks" = "red")) + + theme_minimal() + + theme( + strip.text = element_text(face = "bold", size = 12), + axis.text = element_text(size = 10), + axis.title = element_text(size = 12) + ) + dsl_df |> filter(week_index >= 4)|> filter(source == "c1") |> diff --git a/mgaughan-rstudio-server_32164721.out b/mgaughan-rstudio-server_32164721.out deleted file mode 100644 index f311a90..0000000 --- a/mgaughan-rstudio-server_32164721.out +++ /dev/null @@ -1,18 +0,0 @@ -1. SSH tunnel from your workstation using the following command: - - ssh -N -L 8787:n3439:57601 mjilg@klone.hyak.uw.edu - - and point your web browser to http://localhost:8787 - -2. log in to RStudio Server using the following credentials: - - user: mjilg - password: OmvStzwArWC2NNHj/j8p - -When done using RStudio Server, terminate the job by: - -1. Exit the RStudio Session ("power" button in the top right corner of the RStudio window) -2. Issue the following command on the login node: - - scancel -f 32164721 -[2026-01-06T14:05:14.031] error: *** JOB 32164721 ON n3439 CANCELLED AT 2026-01-06T14:05:14 DUE TO TIME LIMIT ***