updating with some descriptive stats for the commit data
This commit is contained in:
parent
f073136e09
commit
c1dd41b095
66
121325_work/commit_attribution_count.R
Normal file
66
121325_work/commit_attribution_count.R
Normal file
@ -0,0 +1,66 @@
|
||||
library(tidyverse)
|
||||
library(dplyr)
|
||||
library(lubridate)
|
||||
c1_event_date <- as.Date("2013-07-01")
|
||||
c2_event_date <- as.Date("2013-08-28")
|
||||
c3_event_date <- as.Date("2015-07-02")
|
||||
relative_week <- function(date, ref_date) {
|
||||
as.integer(as.numeric(difftime(date, ref_date, units = "days")) %/% 7)
|
||||
}
|
||||
|
||||
contains_http_but_not_url <- function(text) {
|
||||
# Handle NA values explicitly
|
||||
ifelse(
|
||||
is.na(text),
|
||||
FALSE,
|
||||
str_detect(text, "http") &
|
||||
!str_detect(text, "://") |
|
||||
str_detect(text, "login") |
|
||||
str_detect(text, "ssl") |
|
||||
str_detect(text, "tls") |
|
||||
(str_detect(text, "cert") & !str_detect(text, "certain"))
|
||||
)
|
||||
}
|
||||
#by the time C. Scott Ananian is committing he is employed
|
||||
core_csv <-"~/121325_work/121225_vd_data/core_2010-01-01_to_2024-12-31.csv"
|
||||
core_df <- read.csv(core_csv, header = TRUE)
|
||||
known_affil_emails <- c("krinkle@fastmail.com", "roan.kattouw@gmail.com",
|
||||
"trevorparscal@gmail.com", "krinklemail@gmail.com", "moriel@gmail.com")
|
||||
active_names<- c("Timo Tijhof", "Krinkle", "Roan Kattouw", "Catrope",
|
||||
"Trevor Parscal", "Ed Sanders", "Moriel Schottlender", "Gabriel Wicke", "C. Scott Ananian")
|
||||
core_df <- core_df |>
|
||||
mutate(commit_date = ymd_hms(commit_date)) |>
|
||||
mutate(isAuthorWMF = case_when(
|
||||
author_name %in% active_names ~ "NAMES",
|
||||
grepl("@wikimedia\\.org", author_email, ignore.case = TRUE) ~ "TRUE",
|
||||
grepl("@wikimedia\\.de", author_email, ignore.case = TRUE) ~ "TRUE",
|
||||
grepl("l10n-bot@translatewiki\\.net", author_email, ignore.case = TRUE) ~ "localization",
|
||||
grepl("@gerrit\\.wikimedia\\.org", author_email, ignore.case = TRUE) ~ "Gerrit",
|
||||
TRUE ~ "FALSE"
|
||||
)) |>
|
||||
mutate(isVE = case_when(
|
||||
grepl("VisualEditor", message, ignore.case = TRUE) ~ TRUE,
|
||||
grepl(" VE ", message, ignore.case = TRUE) ~ TRUE,
|
||||
TRUE ~ FALSE
|
||||
)) |>
|
||||
mutate(code_location = case_when(
|
||||
grepl("login", diff_info, ignore.case = TRUE) ~ "login_in_location",
|
||||
grepl("auth", diff_info, ignore.case = TRUE) ~ "auth_in_location",
|
||||
grepl("security", diff_info, ignore.case = TRUE) ~ "security",
|
||||
TRUE ~ "Other"
|
||||
))
|
||||
|
||||
c1_relevant <- core_df |>
|
||||
mutate(week_index = relative_week(commit_date, c1_event_date)) |>
|
||||
filter(week_index >= -33 & week_index <= 13) |>
|
||||
filter(author_email != "jenkins-bot@gerrit.wikimedia.org") |>
|
||||
filter(isVE == TRUE)
|
||||
|
||||
|
||||
total_share <- core_df |>
|
||||
filter(commit_date >= as.Date('2012-11-11') & commit_date <= as.Date("2015-10-02"))|>
|
||||
group_by(isAuthorWMF)|>
|
||||
summarise(count = n(), .groups = 'drop')
|
||||
#moving 15 from NAMES to FALSE within this result for the 15 commits that C. Scott. Ananian made prior to his employment
|
||||
|
||||
|
||||
@ -105,15 +105,15 @@ core_commits_created <- ggplot(
|
||||
geom_vline(xintercept = 0, linetype = "dashed", color = "black", linewidth = 0.5) +
|
||||
geom_text(
|
||||
data = subset(core_counts, source == "c1" & week_index == 6),
|
||||
aes(x=week_index, y=120, label='Opt-out deployment'),
|
||||
aes(x=week_index, y=10, label='Opt-out deployment'),
|
||||
size = 2.5) +
|
||||
geom_text(
|
||||
data = subset(core_counts, source == "c1" & week_index == -33),
|
||||
aes(x=week_index, y=120, label='Opt-in Testing'),
|
||||
aes(x=week_index, y=10, label='Opt-in Testing'),
|
||||
size = 2.5) +
|
||||
geom_text(
|
||||
data = subset(core_counts, source == "c2" & week_index == -12),
|
||||
aes(x=week_index, y=20, label='Deployment Announcement'),
|
||||
aes(x=week_index, y=10, label='Deployment Announcement'),
|
||||
size = 2.5) +
|
||||
theme_minimal() +
|
||||
scale_fill_viridis_d() +
|
||||
|
||||
@ -1,72 +0,0 @@
|
||||
library(tidyverse)
|
||||
library(dplyr)
|
||||
library(lubridate)
|
||||
c1_event_date <- as.Date("2013-07-01")
|
||||
c2_event_date <- as.Date("2013-08-28")
|
||||
c3_event_date <- as.Date("2015-07-02")
|
||||
relative_week <- function(date, ref_date) {
|
||||
as.integer(as.numeric(difftime(date, ref_date, units = "days")) %/% 7)
|
||||
}
|
||||
|
||||
core_csv <-"~/121325_work/121225_vd_data/extension_VisualEditor_2000-01-01_to_2016-12-31.csv"
|
||||
core_df <- read.csv(core_csv, header = TRUE)
|
||||
known_affil_emails <- c("krinkle@fastmail.com", "roan.kattouw@gmail.com",
|
||||
"trevorparscal@gmail.com", "krinklemail@gmail.com", "moriel@gmail.com")
|
||||
active_names<- c("Timo Tijhof", "Krinkle", "Roan Kattouw", "Catrope",
|
||||
"Trevor Parscal", "Ed Sanders")
|
||||
core_df <- core_df |>
|
||||
mutate(commit_date = ymd_hms(commit_date)) |>
|
||||
mutate(isAuthorWMF = case_when(
|
||||
author_name %in% active_names ~ "FIVE",
|
||||
grepl("@wikimedia\\.org", author_email, ignore.case = TRUE) ~ "TRUE",
|
||||
grepl("@wikimedia\\.de", author_email, ignore.case = TRUE) ~ "TRUE",
|
||||
grepl("l10n-bot@translatewiki\\.net", author_email, ignore.case = TRUE) ~ "localization",
|
||||
grepl("@gerrit\\.wikimedia\\.org", author_email, ignore.case = TRUE) ~ "Gerrit",
|
||||
TRUE ~ "FALSE"
|
||||
)) |>
|
||||
mutate(isVE = case_when(
|
||||
grepl("VisualEditor", message, ignore.case = TRUE) ~ TRUE,
|
||||
grepl(" VE ", message, ignore.case = TRUE) ~ TRUE,
|
||||
TRUE ~ FALSE
|
||||
))
|
||||
|
||||
c1_core_weekly <- core_df |>
|
||||
mutate(week_index = relative_week(commit_date, c1_event_date)) |>
|
||||
group_by(week_index, isAuthorWMF)|>
|
||||
summarise(count = n(), .groups = 'drop')|>
|
||||
filter(week_index >= -9 & week_index < -4) |>
|
||||
mutate(source = 'c1')
|
||||
c1summary <- c1_core_weekly |>
|
||||
group_by(isAuthorWMF)|>
|
||||
summarize(total = sum(count))
|
||||
|
||||
|
||||
c2_core_weekly <- core_df |>
|
||||
mutate(week_index = relative_week(commit_date, c2_event_date)) |>
|
||||
group_by(week_index, isAuthorWMF)|>
|
||||
summarise(count = n(), .groups = 'drop')|>
|
||||
filter(week_index >= -104 & week_index <= 13) |>
|
||||
mutate(source = 'c2')
|
||||
|
||||
c3_core_weekly <- core_df |>
|
||||
mutate(week_index = relative_week(commit_date, c3_event_date)) |>
|
||||
group_by(week_index, isAuthorWMF)|>
|
||||
summarise(count = n(), .groups = 'drop')|>
|
||||
filter(week_index >= -83 & week_index <= 13) |>
|
||||
mutate(source = 'c3')
|
||||
#collate and save
|
||||
core_weekly <- rbind(c1_core_weekly, c2_core_weekly, c3_core_weekly)
|
||||
|
||||
c1summary <- c1_core_weekly |>
|
||||
group_by(isAuthorWMF)|>
|
||||
summarize(total = sum(count))
|
||||
|
||||
c2summary <- c2_core_weekly |>
|
||||
group_by(isAuthorWMF)|>
|
||||
summarize(total = sum(count))
|
||||
|
||||
c3summary <- c3_core_weekly |>
|
||||
group_by(isAuthorWMF)|>
|
||||
summarize(total = sum(count))
|
||||
|
||||
|
||||
@ -36,7 +36,7 @@ ve_counts <- ve_df |>
|
||||
ve_counts <- ve_counts |>
|
||||
filter(isAuthorWMF != "Gerrit")|>
|
||||
filter(source == 'c1')
|
||||
|
||||
library(ggplot2)
|
||||
ve_new_commits_created <- ggplot(
|
||||
ve_counts,
|
||||
aes(
|
||||
|
||||
18
mgaughan-rstudio-server_32164721.out
Normal file
18
mgaughan-rstudio-server_32164721.out
Normal file
@ -0,0 +1,18 @@
|
||||
1. SSH tunnel from your workstation using the following command:
|
||||
|
||||
ssh -N -L 8787:n3439:57601 mjilg@klone.hyak.uw.edu
|
||||
|
||||
and point your web browser to http://localhost:8787
|
||||
|
||||
2. log in to RStudio Server using the following credentials:
|
||||
|
||||
user: mjilg
|
||||
password: OmvStzwArWC2NNHj/j8p
|
||||
|
||||
When done using RStudio Server, terminate the job by:
|
||||
|
||||
1. Exit the RStudio Session ("power" button in the top right corner of the RStudio window)
|
||||
2. Issue the following command on the login node:
|
||||
|
||||
scancel -f 32164721
|
||||
[2026-01-06T14:05:14.031] error: *** JOB 32164721 ON n3439 CANCELLED AT 2026-01-06T14:05:14 DUE TO TIME LIMIT ***
|
||||
Loading…
Reference in New Issue
Block a user