library(tidyverse) library(dplyr) library(lubridate) library(stringr) c1_event_date <- as.Date("2013-07-01") c2_event_date <- as.Date("2013-08-28") c3_event_date <- as.Date("2015-07-02") relative_week <- function(date, ref_date) { as.integer(as.numeric(difftime(date, ref_date, units = "days")) %/% 7) } contains_http_but_not_url <- function(text) { # Handle NA values explicitly ifelse( is.na(text), FALSE, str_detect(text, "http") & !str_detect(text, "://") | str_detect(text, "login") | str_detect(text, "ssl") | str_detect(text, "tls") | (str_detect(text, "cert") & !str_detect(text, "certain")) ) } #get count data for the repositories #core core_csv <-"~/121325_work/121225_vd_data/core_2010-01-01_to_2024-12-31.csv" core_df <- read.csv(core_csv, header = TRUE) core_df <- core_df |> mutate(commit_date = ymd_hms(commit_date)) |> mutate(code_location = case_when( grepl("login", diff_info, ignore.case = TRUE) ~ "login_in_location", grepl("auth", diff_info, ignore.case = TRUE) ~ "auth_in_location", grepl("security", diff_info, ignore.case = TRUE) ~ "security", TRUE ~ "Other" ))|> mutate(isAuthorWMF = case_when( grepl("krinkle@fastmail\\.com", author_email, ignore.case = TRUE) ~ "TRUE", grepl("@wikimedia\\.org", author_email, ignore.case = TRUE) ~ "TRUE", grepl("@wikimedia\\.de", author_email, ignore.case = TRUE) ~ "TRUE", grepl("@gerrit\\.wikimedia\\.org", author_email, ignore.case = TRUE) ~ "Gerrit", TRUE ~ "FALSE" )) c1_core_weekly <- core_df |> mutate(week_index = relative_week(commit_date, c1_event_date)) |> group_by(week_index, isAuthorWMF, code_location)|> summarise(count = n(), .groups = 'drop')|> filter(week_index >= -33 & week_index <= 13) |> mutate(source = 'c1') c2_core_weekly <- core_df |> mutate(week_index = relative_week(commit_date, c2_event_date)) |> group_by(week_index, isAuthorWMF, code_location)|> summarise(count = n(), .groups = 'drop')|> filter(week_index >= -104 & week_index <= 13) |> mutate(source = 'c2') c3_core_weekly <- core_df |> mutate(week_index = relative_week(commit_date, c3_event_date)) |> group_by(week_index, isAuthorWMF, code_location)|> summarise(count = n(), .groups = 'drop')|> filter(week_index >= -83 & week_index <= 13) |> mutate(source = 'c3') #collate and save core_counts <- rbind(c1_core_weekly, c2_core_weekly, c3_core_weekly) core_counts <- core_counts |> filter(isAuthorWMF != "Gerrit") |> filter(code_location != "Other") core_commits_created <- ggplot( core_counts, aes( x=week_index, y=count, ) ) + facet_grid(source ~ ., scales = "free_y", labeller = labeller(source = c("c1" = "VisualEditor", "c2" = "HTTPS-login", "c3" = "HTTP-deprecation"))) + geom_col(position = position_dodge(width = 0.9), width = 0.8) + geom_vline(data = core_counts |> filter(source == "c1"), aes(xintercept = -29), linetype = "dotted", color = "black", linewidth = 0.5) + geom_vline(data = core_counts |> filter(source == "c1"), aes(xintercept = -9), linetype = "dotted", color = "black", linewidth = 0.5) + geom_vline(data = core_counts |> filter(source == "c1"), aes(xintercept = -4), linetype = "3313", color = "black", linewidth = 0.5) + geom_vline(data = core_counts |> filter(source == "c2"), aes(xintercept = -99), linetype = "dotted", color = "black", linewidth = 0.5) + geom_vline(data = core_counts |> filter(source == "c2"), aes(xintercept = -4), linetype = "3313", color = "black", linewidth = 0.5) + geom_vline(data = core_counts |> filter(source == "c3"), aes(xintercept = -97), linetype = "dotted", color = "black", linewidth = 0.5) + geom_vline(data = core_counts |> filter(source == "c3"), aes(xintercept = -3), linetype = "3313", color = "black", linewidth = 0.5) + geom_vline(xintercept = 0, linetype = "dashed", color = "black", linewidth = 0.5) + geom_text( data = subset(core_counts, source == "c1" & week_index == 6), aes(x=week_index, y=120, label='Opt-out deployment'), size = 2.5) + geom_text( data = subset(core_counts, source == "c1" & week_index == -33), aes(x=week_index, y=120, label='Opt-in Testing'), size = 2.5) + geom_text( data = subset(core_counts, source == "c2" & week_index == -12), aes(x=week_index, y=20, label='Deployment Announcement'), size = 2.5) + theme_minimal() + scale_fill_viridis_d() + labs( x = "Weeks from Feature Deployment", y = "Count of mediawiki/core commits Created", fill = "Commit Author Affiliated with WMF?" ) + theme(legend.position = "top") core_commits_created