1
0

updating the commit data analysis

This commit is contained in:
Matthew Gaughan 2025-04-13 17:07:19 -07:00
parent 3f594bf462
commit 6f61e08376
2 changed files with 22 additions and 10 deletions

View File

@ -5,7 +5,7 @@ library(tidyr)
library(purrr)
library(stringr)
ve_commit_fp <- "/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case2/mediawiki_core_commits.csv"
https_commit_fp <- "/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case2/mediawiki_core_commits.csv"
contains_http_but_not_url <- function(text) {
if (is.na(text)) {
@ -14,11 +14,23 @@ contains_http_but_not_url <- function(text) {
# Split text by whitespace and check each word
words <- str_split(text, "\\s+")[[1]]
for (word in words) {
if (word == "http" || word == "https") {
return(TRUE)
}
if (str_detect(word, "http") && !str_detect(word, "^https?://") && !str_detect(word, "^http?://")) {
return(TRUE)
if (!str_detect(word,"://")){
#http
if (str_detect(word, "http")){
return(TRUE)
}
if (str_detect(word, "login")){
return(TRUE)
}
if (str_detect(word, "ssl")){
return(TRUE)
}
if (str_detect(word, "tls")){
return(TRUE)
}
if (startsWith(word, "cert")){
return(TRUE)
}
}
}
return(FALSE)
@ -343,8 +355,8 @@ transform_relevant_commit_data <- function(filepath){
return(weekly_commits)
}
transformed <- transform_relevant_commit_data(ve_commit_fp)
output_filepath <-"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case2/relevant_event_0404_mediawiki_core_weekly_commit_count_data.csv"
transformed <- transform_relevant_commit_data(https_commit_fp)
output_filepath <-"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case2/relevant_event_0413_mediawiki_core_weekly_commit_count_data.csv"
write.csv(transformed, output_filepath, row.names = FALSE)

View File

@ -1,5 +1,5 @@
library(tidyverse)
count_data_fp <-"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case2/relevant_event_0404_mediawiki_core_weekly_commit_count_data.csv"
count_data_fp <-"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case2/relevant_event_0413_mediawiki_core_weekly_commit_count_data.csv"
input_df <- read.csv(count_data_fp, header = TRUE)
input_df$nonbot_commit_count <- input_df$commit_count - input_df$bot_commit_count
@ -72,7 +72,7 @@ commit_share_plot <- share_long |>
geom_point() +
labs(x = "Relative Week", y = "Share of Nonbot Commits", color="Commit Author Affiliation") +
scale_color_discrete(labels = c("Unaffiliated", "Organizationally Affiliated")) +
ggtitle("MW-core Nonbot Commit Share Around HTTPS-as-default") +
ggtitle("MW-core Nonbot 'relevant' Commit Share Around HTTPS-as-default") +
theme_bw() +
theme(legend.position = "top")
commit_share_plot