67 lines
2.6 KiB
R
67 lines
2.6 KiB
R
library(tidyverse)
|
|
library(dplyr)
|
|
library(lubridate)
|
|
c1_event_date <- as.Date("2013-07-01")
|
|
c2_event_date <- as.Date("2013-08-28")
|
|
c3_event_date <- as.Date("2015-07-02")
|
|
relative_week <- function(date, ref_date) {
|
|
as.integer(as.numeric(difftime(date, ref_date, units = "days")) %/% 7)
|
|
}
|
|
|
|
contains_http_but_not_url <- function(text) {
|
|
# Handle NA values explicitly
|
|
ifelse(
|
|
is.na(text),
|
|
FALSE,
|
|
str_detect(text, "http") &
|
|
!str_detect(text, "://") |
|
|
str_detect(text, "login") |
|
|
str_detect(text, "ssl") |
|
|
str_detect(text, "tls") |
|
|
(str_detect(text, "cert") & !str_detect(text, "certain"))
|
|
)
|
|
}
|
|
#by the time C. Scott Ananian is committing he is employed
|
|
core_csv <-"~/121325_work/121225_vd_data/core_2010-01-01_to_2024-12-31.csv"
|
|
core_df <- read.csv(core_csv, header = TRUE)
|
|
known_affil_emails <- c("krinkle@fastmail.com", "roan.kattouw@gmail.com",
|
|
"trevorparscal@gmail.com", "krinklemail@gmail.com", "moriel@gmail.com")
|
|
active_names<- c("Timo Tijhof", "Krinkle", "Roan Kattouw", "Catrope",
|
|
"Trevor Parscal", "Ed Sanders", "Moriel Schottlender", "Gabriel Wicke", "C. Scott Ananian")
|
|
core_df <- core_df |>
|
|
mutate(commit_date = ymd_hms(commit_date)) |>
|
|
mutate(isAuthorWMF = case_when(
|
|
author_name %in% active_names ~ "NAMES",
|
|
grepl("@wikimedia\\.org", author_email, ignore.case = TRUE) ~ "TRUE",
|
|
grepl("@wikimedia\\.de", author_email, ignore.case = TRUE) ~ "TRUE",
|
|
grepl("l10n-bot@translatewiki\\.net", author_email, ignore.case = TRUE) ~ "localization",
|
|
grepl("@gerrit\\.wikimedia\\.org", author_email, ignore.case = TRUE) ~ "Gerrit",
|
|
TRUE ~ "FALSE"
|
|
)) |>
|
|
mutate(isVE = case_when(
|
|
grepl("VisualEditor", message, ignore.case = TRUE) ~ TRUE,
|
|
grepl(" VE ", message, ignore.case = TRUE) ~ TRUE,
|
|
TRUE ~ FALSE
|
|
)) |>
|
|
mutate(code_location = case_when(
|
|
grepl("login", diff_info, ignore.case = TRUE) ~ "login_in_location",
|
|
grepl("auth", diff_info, ignore.case = TRUE) ~ "auth_in_location",
|
|
grepl("security", diff_info, ignore.case = TRUE) ~ "security",
|
|
TRUE ~ "Other"
|
|
))
|
|
|
|
c1_relevant <- core_df |>
|
|
mutate(week_index = relative_week(commit_date, c1_event_date)) |>
|
|
filter(week_index >= -33 & week_index <= 13) |>
|
|
filter(author_email != "jenkins-bot@gerrit.wikimedia.org") |>
|
|
filter(isVE == TRUE)
|
|
|
|
|
|
total_share <- core_df |>
|
|
filter(commit_date >= as.Date('2012-11-11') & commit_date <= as.Date("2015-10-02"))|>
|
|
group_by(isAuthorWMF)|>
|
|
summarise(count = n(), .groups = 'drop')
|
|
#moving 15 from NAMES to FALSE within this result for the 15 commits that C. Scott. Ananian made prior to his employment
|
|
|
|
|