diff --git a/121325_work/commit_attribution_count.R b/121325_work/commit_attribution_count.R new file mode 100644 index 0000000..a3d7038 --- /dev/null +++ b/121325_work/commit_attribution_count.R @@ -0,0 +1,66 @@ +library(tidyverse) +library(dplyr) +library(lubridate) +c1_event_date <- as.Date("2013-07-01") +c2_event_date <- as.Date("2013-08-28") +c3_event_date <- as.Date("2015-07-02") +relative_week <- function(date, ref_date) { + as.integer(as.numeric(difftime(date, ref_date, units = "days")) %/% 7) +} + +contains_http_but_not_url <- function(text) { + # Handle NA values explicitly + ifelse( + is.na(text), + FALSE, + str_detect(text, "http") & + !str_detect(text, "://") | + str_detect(text, "login") | + str_detect(text, "ssl") | + str_detect(text, "tls") | + (str_detect(text, "cert") & !str_detect(text, "certain")) + ) +} +#by the time C. Scott Ananian is committing he is employed +core_csv <-"~/121325_work/121225_vd_data/core_2010-01-01_to_2024-12-31.csv" +core_df <- read.csv(core_csv, header = TRUE) +known_affil_emails <- c("krinkle@fastmail.com", "roan.kattouw@gmail.com", + "trevorparscal@gmail.com", "krinklemail@gmail.com", "moriel@gmail.com") +active_names<- c("Timo Tijhof", "Krinkle", "Roan Kattouw", "Catrope", + "Trevor Parscal", "Ed Sanders", "Moriel Schottlender", "Gabriel Wicke", "C. Scott Ananian") +core_df <- core_df |> + mutate(commit_date = ymd_hms(commit_date)) |> + mutate(isAuthorWMF = case_when( + author_name %in% active_names ~ "NAMES", + grepl("@wikimedia\\.org", author_email, ignore.case = TRUE) ~ "TRUE", + grepl("@wikimedia\\.de", author_email, ignore.case = TRUE) ~ "TRUE", + grepl("l10n-bot@translatewiki\\.net", author_email, ignore.case = TRUE) ~ "localization", + grepl("@gerrit\\.wikimedia\\.org", author_email, ignore.case = TRUE) ~ "Gerrit", + TRUE ~ "FALSE" + )) |> + mutate(isVE = case_when( + grepl("VisualEditor", message, ignore.case = TRUE) ~ TRUE, + grepl(" VE ", message, ignore.case = TRUE) ~ TRUE, + TRUE ~ FALSE + )) |> + mutate(code_location = case_when( + grepl("login", diff_info, ignore.case = TRUE) ~ "login_in_location", + grepl("auth", diff_info, ignore.case = TRUE) ~ "auth_in_location", + grepl("security", diff_info, ignore.case = TRUE) ~ "security", + TRUE ~ "Other" + )) + +c1_relevant <- core_df |> + mutate(week_index = relative_week(commit_date, c1_event_date)) |> + filter(week_index >= -33 & week_index <= 13) |> + filter(author_email != "jenkins-bot@gerrit.wikimedia.org") |> + filter(isVE == TRUE) + + +total_share <- core_df |> + filter(commit_date >= as.Date('2012-11-11') & commit_date <= as.Date("2015-10-02"))|> + group_by(isAuthorWMF)|> + summarise(count = n(), .groups = 'drop') +#moving 15 from NAMES to FALSE within this result for the 15 commits that C. Scott. Ananian made prior to his employment + + diff --git a/121325_work/location_count_aggregation.R b/121325_work/location_count_aggregation.R index 4b679c9..d7de93f 100644 --- a/121325_work/location_count_aggregation.R +++ b/121325_work/location_count_aggregation.R @@ -105,15 +105,15 @@ core_commits_created <- ggplot( geom_vline(xintercept = 0, linetype = "dashed", color = "black", linewidth = 0.5) + geom_text( data = subset(core_counts, source == "c1" & week_index == 6), - aes(x=week_index, y=120, label='Opt-out deployment'), + aes(x=week_index, y=10, label='Opt-out deployment'), size = 2.5) + geom_text( data = subset(core_counts, source == "c1" & week_index == -33), - aes(x=week_index, y=120, label='Opt-in Testing'), + aes(x=week_index, y=10, label='Opt-in Testing'), size = 2.5) + geom_text( data = subset(core_counts, source == "c2" & week_index == -12), - aes(x=week_index, y=20, label='Deployment Announcement'), + aes(x=week_index, y=10, label='Deployment Announcement'), size = 2.5) + theme_minimal() + scale_fill_viridis_d() + diff --git a/121325_work/misc.R b/121325_work/misc.R deleted file mode 100644 index 94a294b..0000000 --- a/121325_work/misc.R +++ /dev/null @@ -1,72 +0,0 @@ -library(tidyverse) -library(dplyr) -library(lubridate) -c1_event_date <- as.Date("2013-07-01") -c2_event_date <- as.Date("2013-08-28") -c3_event_date <- as.Date("2015-07-02") -relative_week <- function(date, ref_date) { - as.integer(as.numeric(difftime(date, ref_date, units = "days")) %/% 7) -} - -core_csv <-"~/121325_work/121225_vd_data/extension_VisualEditor_2000-01-01_to_2016-12-31.csv" -core_df <- read.csv(core_csv, header = TRUE) -known_affil_emails <- c("krinkle@fastmail.com", "roan.kattouw@gmail.com", - "trevorparscal@gmail.com", "krinklemail@gmail.com", "moriel@gmail.com") -active_names<- c("Timo Tijhof", "Krinkle", "Roan Kattouw", "Catrope", - "Trevor Parscal", "Ed Sanders") -core_df <- core_df |> - mutate(commit_date = ymd_hms(commit_date)) |> - mutate(isAuthorWMF = case_when( - author_name %in% active_names ~ "FIVE", - grepl("@wikimedia\\.org", author_email, ignore.case = TRUE) ~ "TRUE", - grepl("@wikimedia\\.de", author_email, ignore.case = TRUE) ~ "TRUE", - grepl("l10n-bot@translatewiki\\.net", author_email, ignore.case = TRUE) ~ "localization", - grepl("@gerrit\\.wikimedia\\.org", author_email, ignore.case = TRUE) ~ "Gerrit", - TRUE ~ "FALSE" - )) |> - mutate(isVE = case_when( - grepl("VisualEditor", message, ignore.case = TRUE) ~ TRUE, - grepl(" VE ", message, ignore.case = TRUE) ~ TRUE, - TRUE ~ FALSE - )) - -c1_core_weekly <- core_df |> - mutate(week_index = relative_week(commit_date, c1_event_date)) |> - group_by(week_index, isAuthorWMF)|> - summarise(count = n(), .groups = 'drop')|> - filter(week_index >= -9 & week_index < -4) |> - mutate(source = 'c1') -c1summary <- c1_core_weekly |> - group_by(isAuthorWMF)|> - summarize(total = sum(count)) - - -c2_core_weekly <- core_df |> - mutate(week_index = relative_week(commit_date, c2_event_date)) |> - group_by(week_index, isAuthorWMF)|> - summarise(count = n(), .groups = 'drop')|> - filter(week_index >= -104 & week_index <= 13) |> - mutate(source = 'c2') - -c3_core_weekly <- core_df |> - mutate(week_index = relative_week(commit_date, c3_event_date)) |> - group_by(week_index, isAuthorWMF)|> - summarise(count = n(), .groups = 'drop')|> - filter(week_index >= -83 & week_index <= 13) |> - mutate(source = 'c3') -#collate and save -core_weekly <- rbind(c1_core_weekly, c2_core_weekly, c3_core_weekly) - -c1summary <- c1_core_weekly |> - group_by(isAuthorWMF)|> - summarize(total = sum(count)) - -c2summary <- c2_core_weekly |> - group_by(isAuthorWMF)|> - summarize(total = sum(count)) - -c3summary <- c3_core_weekly |> - group_by(isAuthorWMF)|> - summarize(total = sum(count)) - - diff --git a/121325_work/newContrib_count_aggregation.R b/121325_work/newContrib_count_aggregation.R index c584056..2402dbe 100644 --- a/121325_work/newContrib_count_aggregation.R +++ b/121325_work/newContrib_count_aggregation.R @@ -36,7 +36,7 @@ ve_counts <- ve_df |> ve_counts <- ve_counts |> filter(isAuthorWMF != "Gerrit")|> filter(source == 'c1') - +library(ggplot2) ve_new_commits_created <- ggplot( ve_counts, aes( diff --git a/mgaughan-rstudio-server_32164721.out b/mgaughan-rstudio-server_32164721.out new file mode 100644 index 0000000..f311a90 --- /dev/null +++ b/mgaughan-rstudio-server_32164721.out @@ -0,0 +1,18 @@ +1. SSH tunnel from your workstation using the following command: + + ssh -N -L 8787:n3439:57601 mjilg@klone.hyak.uw.edu + + and point your web browser to http://localhost:8787 + +2. log in to RStudio Server using the following credentials: + + user: mjilg + password: OmvStzwArWC2NNHj/j8p + +When done using RStudio Server, terminate the job by: + +1. Exit the RStudio Session ("power" button in the top right corner of the RStudio window) +2. Issue the following command on the login node: + + scancel -f 32164721 +[2026-01-06T14:05:14.031] error: *** JOB 32164721 ON n3439 CANCELLED AT 2026-01-06T14:05:14 DUE TO TIME LIMIT ***