diff --git a/commit_analysis/commit_count_collation.R b/commit_analysis/commit_count_collation.R index c73f853..cc69f25 100644 --- a/commit_analysis/commit_count_collation.R +++ b/commit_analysis/commit_count_collation.R @@ -5,7 +5,7 @@ library(tidyr) library(purrr) library(stringr) -ve_commit_fp <- "/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case2/mediawiki_core_commits.csv" +https_commit_fp <- "/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case2/mediawiki_core_commits.csv" contains_http_but_not_url <- function(text) { if (is.na(text)) { @@ -14,11 +14,23 @@ contains_http_but_not_url <- function(text) { # Split text by whitespace and check each word words <- str_split(text, "\\s+")[[1]] for (word in words) { - if (word == "http" || word == "https") { - return(TRUE) - } - if (str_detect(word, "http") && !str_detect(word, "^https?://") && !str_detect(word, "^http?://")) { - return(TRUE) + if (!str_detect(word,"://")){ + #http + if (str_detect(word, "http")){ + return(TRUE) + } + if (str_detect(word, "login")){ + return(TRUE) + } + if (str_detect(word, "ssl")){ + return(TRUE) + } + if (str_detect(word, "tls")){ + return(TRUE) + } + if (startsWith(word, "cert")){ + return(TRUE) + } } } return(FALSE) @@ -343,8 +355,8 @@ transform_relevant_commit_data <- function(filepath){ return(weekly_commits) } -transformed <- transform_relevant_commit_data(ve_commit_fp) -output_filepath <-"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case2/relevant_event_0404_mediawiki_core_weekly_commit_count_data.csv" +transformed <- transform_relevant_commit_data(https_commit_fp) +output_filepath <-"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case2/relevant_event_0413_mediawiki_core_weekly_commit_count_data.csv" write.csv(transformed, output_filepath, row.names = FALSE) diff --git a/commit_analysis/commit_plotting.R b/commit_analysis/commit_plotting.R index b1a2472..7506041 100644 --- a/commit_analysis/commit_plotting.R +++ b/commit_analysis/commit_plotting.R @@ -1,5 +1,5 @@ library(tidyverse) -count_data_fp <-"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case2/relevant_event_0404_mediawiki_core_weekly_commit_count_data.csv" +count_data_fp <-"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case2/relevant_event_0413_mediawiki_core_weekly_commit_count_data.csv" input_df <- read.csv(count_data_fp, header = TRUE) input_df$nonbot_commit_count <- input_df$commit_count - input_df$bot_commit_count @@ -72,7 +72,7 @@ commit_share_plot <- share_long |> geom_point() + labs(x = "Relative Week", y = "Share of Nonbot Commits", color="Commit Author Affiliation") + scale_color_discrete(labels = c("Unaffiliated", "Organizationally Affiliated")) + - ggtitle("MW-core Nonbot Commit Share Around HTTPS-as-default") + + ggtitle("MW-core Nonbot 'relevant' Commit Share Around HTTPS-as-default") + theme_bw() + theme(legend.position = "top") commit_share_plot