87 lines
2.5 KiB
R
87 lines
2.5 KiB
R
library(tidyverse)
|
|
main_csv <-"~/analysis_data/120725_unified.csv"
|
|
main_df <- read.csv(main_csv, header = TRUE)
|
|
|
|
dsl_csv <-"~/dsl/120725_DSL_frame.csv"
|
|
dsl_df <- read.csv(dsl_csv, header = TRUE)
|
|
|
|
needs_triage <- dsl_df |>
|
|
filter(week_index >= -4) |>
|
|
filter(priority=="Needs Triage") |>
|
|
mutate(ttr_weeks = TTR/168) |>
|
|
group_by(source, isAuthorWMF)|>
|
|
summarize(
|
|
mean_ttr_weeks = mean(ttr_weeks),
|
|
sd_ttr_weeks = sd(ttr_weeks)
|
|
)
|
|
|
|
changes<- dsl_df |>
|
|
filter(priority == "Needs Triage" |
|
|
priority == "Unbreak Now!" |
|
|
priority == "High") |>
|
|
mutate(
|
|
period = case_when(
|
|
week_index >= -4 & week_index <= 4 ~ "8 weeks after announcement",
|
|
week_index >= -13 & week_index <= -5 ~ "8 weeks before deployment announcement",
|
|
TRUE ~ NA
|
|
)
|
|
) %>%
|
|
filter(!is.na(period)) |>
|
|
mutate(ttr_weeks = TTR/168) |>
|
|
group_by(source, period, priority) %>%
|
|
summarise(
|
|
count = n(),
|
|
mean_ttr_weeks = mean(ttr_weeks, na.rm = TRUE),
|
|
sd_ttr_weeks = sd(ttr_weeks, na.rm = TRUE),
|
|
)
|
|
|
|
|
|
# new contributors
|
|
first_task <- main_df |>
|
|
filter(comment_type == "task_description") |>
|
|
group_by(source, AuthorPHID) |>
|
|
summarise(
|
|
task_count = n(),
|
|
first_task_week = min(week_index)
|
|
)
|
|
|
|
tasks_flagged <- main_df %>%
|
|
filter(comment_type == "task_description") |>
|
|
left_join(first_task, by = c("source", "AuthorPHID")) %>%
|
|
mutate(is_first_time_author = week_index == first_task_week)
|
|
|
|
summary_df <- tasks_flagged %>%
|
|
group_by(week_index, source) %>%
|
|
summarize(
|
|
total_tasks = n(),
|
|
first_time_tasks = sum(is_first_time_author),
|
|
proportion_first_time = first_time_tasks / total_tasks
|
|
) %>%
|
|
ungroup()
|
|
|
|
period_counts <- summary_df %>%
|
|
mutate(
|
|
period = case_when(
|
|
week_index >= -4 & week_index <= 4 ~ "recent",
|
|
week_index >= -13 & week_index <= -5 ~ "prior",
|
|
TRUE ~ NA_character_
|
|
)
|
|
) %>%
|
|
filter(!is.na(period)) %>%
|
|
group_by(source, period) %>%
|
|
summarize(period_first_time_tasks = sum(first_time_tasks), .groups = "drop")
|
|
|
|
ggplot(summary_df, aes(x = week_index, y = first_time_tasks)) +
|
|
facet_grid(source ~ .,
|
|
scales = "free_y",
|
|
labeller = labeller(source = c("c1" = "VisualEditor",
|
|
"c2" = "HTTPS-login",
|
|
"c3" = "HTTP-deprecation"))) +
|
|
geom_col() +
|
|
labs(
|
|
x = "Date of first task",
|
|
y = "Number of tasks created",
|
|
title = "Task count by Author's first task date"
|
|
) +
|
|
theme_minimal()
|