diff --git a/.sh_history b/.sh_history index 787e8e6..83dcc5b 100644 --- a/.sh_history +++ b/.sh_history @@ -65,3 +65,12 @@ cd case1 ls mv event_0314_bot_frameworks_weekly_commit_count_data.csv en-testing_0314_bot_frameworks_weekly_commit_count_data.csv ls +cd .. +cd case2 +ls +mv core_2010-01-01_to_2024-12-31.csv mediawiki_core_commits.csv +mv mediawiki-config_2010-01-01_to_2024-12-31.csv mediawiki_wmfconfig_commits.csv +ls +rm event_0403_mediawiki_core_weekly_commit_count_data.csv +rm event_0403_mediawiki_wmfconfig_weekly_commit_count_data.csv +ls diff --git a/0403-https-core-event-new-commits.png b/0403-https-core-event-new-commits.png new file mode 100644 index 0000000..2bf06b7 Binary files /dev/null and b/0403-https-core-event-new-commits.png differ diff --git a/0316-bot-frameworks-commits-event.png b/commit_analysis/case1/0316-bot-frameworks-commits-event.png similarity index 100% rename from 0316-bot-frameworks-commits-event.png rename to commit_analysis/case1/0316-bot-frameworks-commits-event.png diff --git a/0401-bot-frameworks-commits-event.png b/commit_analysis/case1/0401-bot-frameworks-commits-event.png similarity index 100% rename from 0401-bot-frameworks-commits-event.png rename to commit_analysis/case1/0401-bot-frameworks-commits-event.png diff --git a/0401-bot-frameworks-commits-testing.png b/commit_analysis/case1/0401-bot-frameworks-commits-testing.png similarity index 100% rename from 0401-bot-frameworks-commits-testing.png rename to commit_analysis/case1/0401-bot-frameworks-commits-testing.png diff --git a/commit_analysis/commit_count_collation.R b/commit_analysis/commit_count_collation.R index 737baae..38d3e8b 100644 --- a/commit_analysis/commit_count_collation.R +++ b/commit_analysis/commit_count_collation.R @@ -4,7 +4,7 @@ library(lubridate) library(tidyr) library(purrr) -ve_commit_fp <- "/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case1/mediawiki_core_commits.csv" +ve_commit_fp <- "/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case2/mediawiki_wmfconfig_commits.csv" transform_commit_data <- function(filepath){ #basic, loading in the file @@ -15,7 +15,8 @@ transform_commit_data <- function(filepath){ # TODO: this is project/event specific - event_date <- as.Date("2013-07-01") + event_date <- as.Date("2013-08-28") + #event_date <- as.Date("2013-07-01") #event_date <- as.Date("2013-04-25") #event_date <- as.Date("2012-12-11") @@ -61,7 +62,7 @@ transform_commit_data <- function(filepath){ # list all author_emails with >5 commits # for big df: if author not in the list, 'new' author old_author_list <- df |> - filter(commit_date < as.Date("2013-06-01"))|> + filter(commit_date < as.Date("2013-08-01"))|> group_by(author_email) |> summarise(commit_count = n()) |> filter(commit_count > 5) |> @@ -164,7 +165,7 @@ transform_commit_data <- function(filepath){ } transformed <- transform_commit_data(ve_commit_fp) -output_filepath <-"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case1/event_0401_mediawiki_core_weekly_commit_count_data.csv" +output_filepath <-"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case2/event_0403_mediawiki_wmfconfig_weekly_commit_count_data.csv" write.csv(transformed, output_filepath, row.names = FALSE) diff --git a/commit_analysis/commit_plotting.R b/commit_analysis/commit_plotting.R index f3dfc04..60fa4c9 100644 --- a/commit_analysis/commit_plotting.R +++ b/commit_analysis/commit_plotting.R @@ -1,5 +1,5 @@ library(tidyverse) -count_data_fp <-"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case1/event_0401_mediawiki_core_weekly_commit_count_data.csv" +count_data_fp <-"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case2/event_0403_mediawiki_core_weekly_commit_count_data.csv" input_df <- read.csv(count_data_fp, header = TRUE) input_df$nonbot_commit_count <- input_df$commit_count - input_df$bot_commit_count @@ -27,13 +27,12 @@ new_authors <- long_df |> labels = c("nonbot_commit_count" = "Total Nonbot Commits", "unaff_new_commit_count" = "New Unaffiliated Commits", "wmf_new_commit_count" = "New WMF Commits")) + - ggtitle("MW-core Commits Around Wide Release ('New' contributors <= 5 commits before 06-01-2013)") + + ggtitle("MW-core Commits Around HTTPS as-default ('New' contributors <= 5 commits before 08-01-2013)") + theme_bw() + theme(legend.position = "top") new_authors -ggsave(filename = "0401-core-event-new-commits.png", plot = new_authors, width = 12, height = 9, dpi = 800) - +ggsave(filename = "0403-https-core-event-new-commits.png", plot = new_authors, width = 12, height = 9, dpi = 800) window_num <- 12 @@ -73,7 +72,7 @@ commit_share_plot <- share_long |> geom_point() + labs(x = "Relative Week", y = "Share of Nonbot Commits", color="Commit Author Affiliation") + scale_color_discrete(labels = c("Unaffiliated", "Organizationally Affiliated")) + - ggtitle("VisualEditor Nonbot Commit Share Around Opt-out Deployment") + + ggtitle("MW-core Nonbot Commit Share Around HTTPS-as-default") + theme_bw() + theme(legend.position = "top") commit_share_plot diff --git a/commit_analysis/testing-share-plotting.R b/commit_analysis/testing-share-plotting.R index b5a2c07..d9c318b 100644 --- a/commit_analysis/testing-share-plotting.R +++ b/commit_analysis/testing-share-plotting.R @@ -48,18 +48,18 @@ affiliationColors <- setNames( c('black','#5da2d8', '#c7756a') ,c("nonbot_commit_count","other_commit_count", "wikimedia_commit_count")) +# linetype = rd_event commit_plot <- final_long_df |> ggplot(aes(x=relative_week, y=total_commit_count, - color=factor(commit_type), - linetype = rd_event)) + + color=factor(commit_type))) + geom_line() + geom_point() + labs(x = "Relative Week", y = "Commits", linetype = "Testing Event", color="Commit Author Affiliation") + scale_color_manual(values = affiliationColors, labels = c("other_commit_count" = "Unaffiliated", "wikimedia_commit_count" = "WMF Affiliated")) + - scale_linetype_discrete(labels = c("enwiki testing (2012-12-11)", "wide testing (2013-04-25)")) + +# scale_linetype_discrete(labels = c("enwiki testing (2012-12-11)", "wide testing (2013-04-25)")) + ggtitle("bot-frameworks Commits Around Wide Release (by Affiliation)") + theme_bw() + theme(legend.position = "top") diff --git a/mgaughan-rstudio-server_25101978.out b/mgaughan-rstudio-server_25101978.out deleted file mode 100644 index 31173fa..0000000 --- a/mgaughan-rstudio-server_25101978.out +++ /dev/null @@ -1,18 +0,0 @@ -1. SSH tunnel from your workstation using the following command: - - ssh -N -L 8787:n3439:60937 mjilg@klone.hyak.uw.edu - - and point your web browser to http://localhost:8787 - -2. log in to RStudio Server using the following credentials: - - user: mjilg - password: Uoe+nDeFvcaRiOows2lT - -When done using RStudio Server, terminate the job by: - -1. Exit the RStudio Session ("power" button in the top right corner of the RStudio window) -2. Issue the following command on the login node: - - scancel -f 25101978 -slurmstepd: error: *** JOB 25101978 ON n3439 CANCELLED AT 2025-04-01T10:12:30 DUE TO TIME LIMIT *** diff --git a/mgaughan-rstudio-server_25104883.out b/mgaughan-rstudio-server_25104883.out deleted file mode 100644 index 3b407df..0000000 --- a/mgaughan-rstudio-server_25104883.out +++ /dev/null @@ -1,17 +0,0 @@ -1. SSH tunnel from your workstation using the following command: - - ssh -N -L 8787:n3439:60911 mjilg@klone.hyak.uw.edu - - and point your web browser to http://localhost:8787 - -2. log in to RStudio Server using the following credentials: - - user: mjilg - password: EKa4WB0aZn3MeC3Huz21 - -When done using RStudio Server, terminate the job by: - -1. Exit the RStudio Session ("power" button in the top right corner of the RStudio window) -2. Issue the following command on the login node: - - scancel -f 25104883