library(tidyverse) main_csv <-"~/analysis_data/120725_unified.csv" main_df <- read.csv(main_csv, header = TRUE) dsl_csv <-"~/dsl/120725_DSL_frame.csv" dsl_df <- read.csv(dsl_csv, header = TRUE) needs_triage <- dsl_df |> filter(week_index >= -4) |> filter(priority=="Needs Triage") |> mutate(ttr_weeks = TTR/168) |> group_by(source, isAuthorWMF)|> summarize( mean_ttr_weeks = mean(ttr_weeks), sd_ttr_weeks = sd(ttr_weeks) ) changes<- dsl_df |> filter(priority == "Needs Triage" | priority == "Unbreak Now!" | priority == "High") |> mutate( period = case_when( week_index >= -4 & week_index <= 4 ~ "8 weeks after announcement", week_index >= -13 & week_index <= -5 ~ "8 weeks before deployment announcement", TRUE ~ NA ) ) %>% filter(!is.na(period)) |> mutate(ttr_weeks = TTR/168) |> group_by(source, period, priority) %>% summarise( count = n(), mean_ttr_weeks = mean(ttr_weeks, na.rm = TRUE), sd_ttr_weeks = sd(ttr_weeks, na.rm = TRUE), ) # new contributors first_task <- main_df |> filter(comment_type == "task_description") |> group_by(source, AuthorPHID) |> summarise( task_count = n(), first_task_week = min(week_index) ) tasks_flagged <- main_df %>% filter(comment_type == "task_description") |> left_join(first_task, by = c("source", "AuthorPHID")) %>% mutate(is_first_time_author = week_index == first_task_week) summary_df <- tasks_flagged %>% group_by(week_index, source) %>% summarize( total_tasks = n(), first_time_tasks = sum(is_first_time_author), proportion_first_time = first_time_tasks / total_tasks ) %>% ungroup() period_counts <- summary_df %>% mutate( period = case_when( week_index >= -4 & week_index <= 4 ~ "recent", week_index >= -13 & week_index <= -5 ~ "prior", TRUE ~ NA_character_ ) ) %>% filter(!is.na(period)) %>% group_by(source, period) %>% summarize(period_first_time_tasks = sum(first_time_tasks), .groups = "drop") ggplot(summary_df, aes(x = week_index, y = first_time_tasks)) + facet_grid(source ~ ., scales = "free_y", labeller = labeller(source = c("c1" = "VisualEditor", "c2" = "HTTPS-login", "c3" = "HTTP-deprecation"))) + geom_col() + labs( x = "Date of first task", y = "Number of tasks created", title = "Task count by Author's first task date" ) + theme_minimal()