updating with revised plots and data
@ -2,24 +2,76 @@ library(tidyverse)
|
||||
main_csv <-"~/analysis_data/120725_unified.csv"
|
||||
main_df <- read.csv(main_csv, header = TRUE)
|
||||
|
||||
author_closer <- main_df |>
|
||||
filter(comment_type == "task_description") |>
|
||||
filter(author_closer == "True")
|
||||
table(author_closer$isAuthorWMF)
|
||||
dsl_csv <-"~/dsl/120725_DSL_frame.csv"
|
||||
dsl_df <- read.csv(dsl_csv, header = TRUE)
|
||||
|
||||
needs_triage <- dsl_df |>
|
||||
filter(week_index >= -4) |>
|
||||
filter(priority=="Needs Triage") |>
|
||||
mutate(ttr_weeks = TTR/168) |>
|
||||
group_by(source, isAuthorWMF)|>
|
||||
summarize(
|
||||
mean_ttr_weeks = mean(ttr_weeks),
|
||||
sd_ttr_weeks = sd(ttr_weeks)
|
||||
)
|
||||
|
||||
changes<- dsl_df |>
|
||||
filter(priority == "Needs Triage" |
|
||||
priority == "Unbreak Now!" |
|
||||
priority == "High") |>
|
||||
mutate(
|
||||
period = case_when(
|
||||
week_index >= -4 & week_index <= 4 ~ "8 weeks after announcement",
|
||||
week_index >= -13 & week_index <= -5 ~ "8 weeks before deployment announcement",
|
||||
TRUE ~ NA
|
||||
)
|
||||
) %>%
|
||||
filter(!is.na(period)) |>
|
||||
mutate(ttr_weeks = TTR/168) |>
|
||||
group_by(source, period, priority) %>%
|
||||
summarise(
|
||||
count = n(),
|
||||
mean_ttr_weeks = mean(ttr_weeks, na.rm = TRUE),
|
||||
sd_ttr_weeks = sd(ttr_weeks, na.rm = TRUE),
|
||||
)
|
||||
|
||||
|
||||
new_authors_summary <- main_df |>
|
||||
# new contributors
|
||||
first_task <- main_df |>
|
||||
filter(comment_type == "task_description") |>
|
||||
group_by(source, AuthorPHID) |>
|
||||
summarise(
|
||||
task_count = n(),
|
||||
first_task = min(week_index)
|
||||
) |>
|
||||
group_by(first_task, source) |>
|
||||
summarise(
|
||||
new_authors_count = n()
|
||||
first_task_week = min(week_index)
|
||||
)
|
||||
ggplot(new_authors_summary, aes(x = first_task, y = new_authors_count)) +
|
||||
|
||||
tasks_flagged <- main_df %>%
|
||||
filter(comment_type == "task_description") |>
|
||||
left_join(first_task, by = c("source", "AuthorPHID")) %>%
|
||||
mutate(is_first_time_author = week_index == first_task_week)
|
||||
|
||||
summary_df <- tasks_flagged %>%
|
||||
group_by(week_index, source) %>%
|
||||
summarize(
|
||||
total_tasks = n(),
|
||||
first_time_tasks = sum(is_first_time_author),
|
||||
proportion_first_time = first_time_tasks / total_tasks
|
||||
) %>%
|
||||
ungroup()
|
||||
|
||||
period_counts <- summary_df %>%
|
||||
mutate(
|
||||
period = case_when(
|
||||
week_index >= -4 & week_index <= 4 ~ "recent",
|
||||
week_index >= -13 & week_index <= -5 ~ "prior",
|
||||
TRUE ~ NA_character_
|
||||
)
|
||||
) %>%
|
||||
filter(!is.na(period)) %>%
|
||||
group_by(source, period) %>%
|
||||
summarize(period_first_time_tasks = sum(first_time_tasks), .groups = "drop")
|
||||
|
||||
ggplot(summary_df, aes(x = week_index, y = first_time_tasks)) +
|
||||
facet_grid(source ~ .,
|
||||
scales = "free_y",
|
||||
labeller = labeller(source = c("c1" = "VisualEditor",
|
||||
|
||||
BIN
doc_plots/120825_dsl_coefs.png
Normal file
|
After Width: | Height: | Size: 427 KiB |
|
Before Width: | Height: | Size: 298 KiB After Width: | Height: | Size: 298 KiB |
|
Before Width: | Height: | Size: 331 KiB After Width: | Height: | Size: 328 KiB |
|
Before Width: | Height: | Size: 352 KiB After Width: | Height: | Size: 338 KiB |
15
dsl/dsl.R
@ -80,9 +80,9 @@ dev_model <- dsl(
|
||||
sample_split = 20,
|
||||
data=dsl_df
|
||||
)
|
||||
summary(dev_model)
|
||||
saveRDS(dev_model, "120725_logit_dsl.RDS")
|
||||
|
||||
#summary(dev_model)
|
||||
#saveRDS(dev_model, "120725_logit_dsl.RDS")
|
||||
dev_model <- readRDS("dsl/120725_logit_dsl.RDS")
|
||||
library(broom)
|
||||
library(dplyr)
|
||||
tidy.dsl <- function(x, conf.int = FALSE, conf.level = 0.95, exponentiate = FALSE, ...) {
|
||||
@ -138,7 +138,7 @@ coef_df <- coef_df |>
|
||||
"WMF-affiliate Author:HTTP-deprecation"
|
||||
)))
|
||||
)
|
||||
ggplot(coef_df, aes(x = estimate, y = term)) +
|
||||
dsl_coefs <- ggplot(coef_df, aes(x = estimate, y = term)) +
|
||||
geom_point(size = 1) +
|
||||
geom_errorbar(aes(xmin = estimate - 1.96*std.error, xmax = estimate + 1.96 *std.error), height = 0.2) +
|
||||
geom_vline(xintercept = 0, linetype = "dashed", color = "red") +
|
||||
@ -146,3 +146,10 @@ ggplot(coef_df, aes(x = estimate, y = term)) +
|
||||
x = "Coefficient Estimate",
|
||||
y = "Variable") +
|
||||
theme_minimal()
|
||||
ggsave(
|
||||
filename = "120825_dsl_coefs.png",
|
||||
plot = dsl_coefs,
|
||||
width = 8, # inches
|
||||
height = 6, # inches
|
||||
dpi = 600 # high resolution
|
||||
)
|
||||
|
||||
@ -93,15 +93,14 @@ c1_ttr_plot <- dsl_df |>
|
||||
size = 4) +
|
||||
labs(x = "Weeks from Release",
|
||||
y = "Time to Resolution (weeks)",
|
||||
fill = "Priority Tag",
|
||||
title = "VisualEditor Time to Resolution by Triage Priority") +
|
||||
fill = "Priority Tag") +
|
||||
theme(legend.position = "top")
|
||||
c1_ttr_plot
|
||||
ggsave(
|
||||
filename = "120825_c1_ttr.png",
|
||||
plot = c1_ttr_plot,
|
||||
width = 12, # inches
|
||||
height = 4, # inches
|
||||
height = 6, # inches
|
||||
dpi = 600 # high resolution
|
||||
)
|
||||
|
||||
@ -204,7 +203,6 @@ tasks_created <- ggplot(
|
||||
labs(
|
||||
x = "Weeks from Feature Deployment",
|
||||
y = "Count of Tasks Created",
|
||||
title = "Phabricator Tasks Created by Week and Author Affiliation",
|
||||
fill = "Task Author Affiliated with WMF?"
|
||||
) +
|
||||
theme(legend.position = "top")
|
||||
@ -213,7 +211,7 @@ ggsave(
|
||||
filename = "120825_tasks_created.png",
|
||||
plot = tasks_created,
|
||||
width = 12, # inches
|
||||
height = 4, # inches
|
||||
height = 6, # inches
|
||||
dpi = 600 # high resolution
|
||||
)
|
||||
|
||||
|
||||
@ -88,8 +88,7 @@ task_status_plot <- declined_summary|>
|
||||
"c3" = "HTTP-deprecation"))) +
|
||||
geom_col(position = position_dodge(width = 0.9), width = 0.8) +
|
||||
scale_fill_viridis_d(option='magma') +
|
||||
labs(title = "Task Status (as of February 28, 2025) by Week",
|
||||
x = "Weeks from feature deployment",
|
||||
labs(x = "Weeks from feature deployment",
|
||||
y = "% of items in status",
|
||||
fill = "Task Status") +
|
||||
geom_vline(xintercept = 0, linetype = "dashed", color = "black", linewidth = 0.5) +
|
||||
@ -103,7 +102,7 @@ ggsave(
|
||||
filename = "120825_tasks_status.png",
|
||||
plot = task_status_plot,
|
||||
width = 12, # inches
|
||||
height = 4, # inches
|
||||
height = 6, # inches
|
||||
dpi = 600 # high resolution
|
||||
)
|
||||
|
||||
|
||||