library(tidyverse) library(dplyr) library(lubridate) relative_week <- function(date, ref_date) { as.integer(as.numeric(difftime(date, ref_date, units = "days")) %/% 7) } core_csv <-"~/121325_work/121225_vd_data/core_2010-01-01_to_2024-12-31.csv" core_df <- read.csv(core_csv, header = TRUE) core_df <- core_df |> mutate(repo = "core") ve_csv <-"~/121325_work/121225_vd_data/extension_VisualEditor_2000-01-01_to_2016-12-31.csv" ve_df <- read.csv(ve_csv, header = TRUE) ve_df <- ve_df |> mutate(repo = "ve") joint_df <- rbind(core_df, ve_df) top_four_emails <- ve_df %>% filter(commit_date >= as.Date("2012-11-11") & commit_date <= as.Date("2013-09-29") ) |> group_by(author_email) %>% summarise(email_count = n(), .groups = "drop") %>% arrange(desc(email_count)) %>% slice_head(n = 7) known_affil_emails <- c("krinkle@fastmail.com", "roan.kattouw@gmail.com", "trevorparscal@gmail.com", "krinklemail@gmail.com", "moriel@gmail.com") active_names<- c("Timo Tijhof", "Krinkle", "Roan Kattouw", "Catrope", "Trevor Parscal", "Ed Sanders") #jforrester@wikimedia.org #(author_name %in% active_names) ~ "ActiveEmails", joint_df <- joint_df |> mutate(commit_date = ymd_hms(commit_date)) |> mutate(isAuthorWMF = case_when( (author_email %in% known_affil_emails) ~ "TRUE", grepl("l10n-bot@translatewiki\\.net", author_email, ignore.case = TRUE) ~ "localization", grepl("@wikimedia\\.org", author_email, ignore.case = TRUE) ~ "TRUE", grepl("@wikimedia\\.de", author_email, ignore.case = TRUE) ~ "TRUE", grepl("@gerrit\\.wikimedia\\.org", author_email, ignore.case = TRUE) ~ "Gerrit", TRUE ~ "FALSE" )) library(tidyr) authors_in_both <- joint_df |> group_by(author_email, repo) |> summarise(commit_count = n(), .groups = "drop") |> pivot_wider(names_from = repo, values_from = commit_count, values_fill = 0) |> filter(core > 0 & ve > 0) |> pull(author_email) joint_df_filtered <- joint_df |> filter(author_email %in% authors_in_both) c1_event_date <- as.Date("2013-07-01") c1_core_weekly <- joint_df_filtered |> mutate(week_index = relative_week(commit_date, c1_event_date)) |> group_by(week_index, isAuthorWMF, repo)|> summarise(count = n(), .groups = 'drop')|> filter(week_index >= -33 & week_index <= 13) |> mutate(source = 'c1') counts <- c1_core_weekly |> filter(isAuthorWMF != "Gerrit")|> filter(isAuthorWMF != "localization")|> filter(source == 'c1') #counts <- c1_core_weekly |> # filter(isAuthorWMF == "ActiveEmails") |> # filter(source == 'c1') commits_created <- ggplot( counts, aes( x=week_index, y=count, fill=isAuthorWMF, ) ) + facet_grid(repo ~ ., scales = "free_y", labeller = labeller(source = c("c1" = "VisualEditor", "c2" = "HTTPS-login", "c3" = "HTTP-deprecation"))) + geom_col(position = position_dodge(width = 0.9), width = 0.8) + geom_vline(data = counts |> filter(source == "c1"), aes(xintercept = -29), linetype = "dotted", color = "black", linewidth = 0.5) + geom_vline(data = counts |> filter(source == "c1"), aes(xintercept = -9), linetype = "dotted", color = "black", linewidth = 0.5) + geom_vline(data = counts |> filter(source == "c1"), aes(xintercept = -4), linetype = "3313", color = "black", linewidth = 0.5) + geom_vline(xintercept = 0, linetype = "dashed", color = "black", linewidth = 0.5) + geom_text( data = subset(counts, source == "c1" & week_index == 3), aes(x=week_index, y=100, label='Opt-out deployment'), size = 2.5) + geom_text( data = subset(counts, source == "c1" & week_index == -27), aes(x=week_index, y=100, label='Opt-in Testing'), size = 2.5) + geom_text( data = subset(counts, source == "c1" & week_index == -4), aes(x=week_index, y=100, label='Deployment Announcement'), size = 2.5) + theme_minimal() + scale_fill_viridis_d() + labs( x = "Weeks from Feature Deployment", y = "Weekly count of new Commits", fill = "Commit Author Affiliated with WMF?" ) + theme(legend.position = "top") commits_created