diff --git a/.sh_history b/.sh_history index b9bfa4a..2e0c6b5 100644 --- a/.sh_history +++ b/.sh_history @@ -168,3 +168,5 @@ cd .. ls cd case2 ls +ls ../case1 +ls ../case3 diff --git a/commit_analysis/commit_plotting.R b/commit_analysis/plotting/commit_plotting.R similarity index 100% rename from commit_analysis/commit_plotting.R rename to commit_analysis/plotting/commit_plotting.R diff --git a/commit_analysis/relevance-plot.R b/commit_analysis/plotting/relevance-plot.R similarity index 100% rename from commit_analysis/relevance-plot.R rename to commit_analysis/plotting/relevance-plot.R diff --git a/commit_analysis/testing-share-plotting.R b/commit_analysis/plotting/testing-share-plotting.R similarity index 100% rename from commit_analysis/testing-share-plotting.R rename to commit_analysis/plotting/testing-share-plotting.R diff --git a/commit_analysis/plotting/ww-bots-plot-script.R b/commit_analysis/plotting/ww-bots-plot-script.R new file mode 100644 index 0000000..a254bd2 --- /dev/null +++ b/commit_analysis/plotting/ww-bots-plot-script.R @@ -0,0 +1,94 @@ +library(tidyverse) + +c1_bots <-"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case1/event_0430_framework_commit_counts.csv" +c1_input_df <- read.csv(c1_bots , header = TRUE) + +c2_bots <-"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case2/event_0430_framework_commit_counts.csv" +c2_input_df <- read.csv(c2_bots , header = TRUE) + +c3_bots <-"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case3/event_0430_framework_commit_counts.csv" +c3_input_df <- read.csv(c3_bots , header = TRUE) + +# Add a column to each dataframe to label them +c1_input_df <- c1_input_df %>% mutate(source = "c1") +c2_input_df <- c2_input_df %>% mutate(source = "c2") +c3_input_df <- c3_input_df %>% mutate(source = "c3") + +# Combine the dataframes into one +combined_df <- bind_rows(c1_input_df, c2_input_df, c3_input_df) + +combined_df$nonbot_commit_count <- combined_df$commit_count - combined_df$bot_commit_count +combined_df$unaff_commit_count <- combined_df$nonbot_commit_count - combined_df$wikimedia_commit_count + +library(scales) +library(ggplot2) + +long_df <- combined_df |> + tidyr::pivot_longer(cols = c(unaff_commit_count, wikimedia_commit_count), + names_to = "commit_type", + values_to = "lengthened_commit_count") + +summed_df <- long_df |> + group_by(relative_week, project, commit_type) |> + summarize(total_commits = sum(lengthened_commit_count), .groups = 'drop') + +combined_df$returning_unaff_commit_count = combined_df$unaff_commit_count - combined_df$unaff_new_commit_count + +new_authors_long_df <- combined_df |> + tidyr::pivot_longer(cols = c(unaff_new_commit_count, returning_unaff_commit_count), + names_to = "commit_seniority", + values_to = "lengthened_commit_count") + + +new_unaff_authors <- new_authors_long_df |> + ggplot(aes(x=relative_week, + y=lengthened_commit_count, + fill=commit_seniority)) + + geom_col(position='dodge') + + labs(x = "Relative Week", y = "Commits", fill="Contributor Tenure (New contributors <= 5 commits before deployment announcement)") + + geom_vline(data = long_df |> filter(source == "c1"), + aes(xintercept = -30), + linetype = "dotted", color = "black", linewidth = 1) + + geom_vline(data = long_df |> filter(source == "c1"), + aes(xintercept = -9), + linetype = "dotted", color = "black", linewidth = 1) + + geom_vline(data = long_df |> filter(source == "c2"), + aes(xintercept = -99), + linetype = "dotted", color = "black", linewidth = 1) + + geom_vline(data = long_df |> filter(source == "c3"), + aes(xintercept = -97), + linetype = "dotted", color = "black", linewidth = 1) + + geom_text(data = data.frame(source = "c1", relative_week = -40, lengthened_commit_count = 90), + aes(x = relative_week, y = lengthened_commit_count, label = "Opt-In Testing Deployment"), + inherit.aes = FALSE, color = "black", size = 4) + + geom_vline(xintercept = 0, linetype = "dashed", color = "black", linewidth = 1) + # Add vertical line at week 0 + geom_text(data = data.frame(source = "c1", relative_week = 7, lengthened_commit_count = 90), + aes(x = relative_week, y = lengthened_commit_count, label = "Feature Deployment"), + inherit.aes = FALSE, color = "black", size = 4) + + scale_fill_manual(values = c("returning_unaff_commit_count" = "#FFC107", # Color for "Returning Contributors" + "unaff_new_commit_count" = "#004D40"), + labels = c("returning_unaff_commit_count" = "Returning Contributors", + "unaff_new_commit_count" = "New Contributors") + ) + + ggtitle("Unaffiliated Commits to Bot Framework Libraries During Feature Deployments") + + theme_bw() + + theme( + legend.position = "top", + plot.title = element_text(size = 24, face = "bold"), # Increase title font size + axis.title.x = element_text(size = 18), # Increase x-axis title font size + axis.title.y = element_text(size = 18), # Increase y-axis title font size + axis.text.x = element_text(size = 16), # Increase x-axis text font size + axis.text.y = element_text(size = 16), # Increase y-axis text font size + legend.text = element_text(size = 16), # Increase legend text font size + legend.title = element_text(size = 16), + strip.text = element_text(size = 14)# Increase legend title font size + ) + + facet_wrap(~source, nrow = 3, labeller = labeller(source = c( + "c1" = "VisualEditor", + "c2" = "HTTPS-as-default", + "c3" = "HTTP-deprecation" + ))) + +new_unaff_authors + +ggsave(filename = "ww-0501-bot-commits-faceted.png", plot = new_unaff_authors, width = 12, height = 9, dpi = 800) diff --git a/commit_analysis/plotting/ww-plot-script.R b/commit_analysis/plotting/ww-plot-script.R new file mode 100644 index 0000000..cce8c0c --- /dev/null +++ b/commit_analysis/plotting/ww-plot-script.R @@ -0,0 +1,83 @@ +library(tidyverse) + +c1_count <-"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case1/event_0421_extensions_ve_weekly_count.csv" +c1_input_df <- read.csv(c1_count , header = TRUE) + +c2_count <-"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case2/event_0430_mediawiki_core_weekly_count.csv" +c2_input_df <- read.csv(c2_count , header = TRUE) + +c3_count <-"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case3/event_0430_mediawiki_core_weekly_count.csv" +c3_input_df <- read.csv(c3_count , header = TRUE) + +# Add a column to each dataframe to label them +c1_input_df <- c1_input_df %>% mutate(source = "c1") +c2_input_df <- c2_input_df %>% mutate(source = "c2") +c3_input_df <- c3_input_df %>% mutate(source = "c3") + +# Combine the dataframes into one +combined_df <- bind_rows(c1_input_df, c2_input_df, c3_input_df) + +combined_df$nonbot_commit_count <- combined_df$commit_count - combined_df$bot_commit_count +combined_df$unaff_commit_count <- combined_df$nonbot_commit_count - combined_df$wikimedia_commit_count + +library(scales) +library(ggplot2) + +long_df <- combined_df |> + tidyr::pivot_longer(cols = c(unaff_commit_count, wikimedia_commit_count), + names_to = "commit_type", + values_to = "lengthened_commit_count") + +affiliationColors <- + setNames( c('#5da2d8', '#c7756a') + ,c("unaff_commit_count", "wikimedia_commit_count")) + +commit_authors <- long_df |> + ggplot(aes(x=relative_week, + y=lengthened_commit_count, + fill=factor(commit_type))) + + geom_col(position='dodge') + + labs(x = "Relative Week", y = "Commits", fill="Commit Type") + + geom_vline(data = long_df |> filter(source == "c1"), + aes(xintercept = -30), + linetype = "dotted", color = "black", linewidth = 1) + + geom_vline(data = long_df |> filter(source == "c1"), + aes(xintercept = -9), + linetype = "dotted", color = "black", linewidth = 1) + + geom_vline(data = long_df |> filter(source == "c2"), + aes(xintercept = -99), + linetype = "dotted", color = "black", linewidth = 1) + + geom_vline(data = long_df |> filter(source == "c3"), + aes(xintercept = -97), + linetype = "dotted", color = "black", linewidth = 1) + + geom_text(data = data.frame(source = "c1", relative_week = -40, lengthened_commit_count = 50), + aes(x = relative_week, y = lengthened_commit_count, label = "Opt-In Testing Deployment"), + inherit.aes = FALSE, color = "black", size = 4) + + geom_vline(xintercept = 0, linetype = "dashed", color = "black", linewidth = 1) + # Add vertical line at week 0 + geom_text(data = data.frame(source = "c1", relative_week = 7, lengthened_commit_count = 50), + aes(x = relative_week, y = lengthened_commit_count, label = "Feature Deployment"), + inherit.aes = FALSE, color = "black", size = 4) + + scale_fill_manual(values = affiliationColors, + labels = c("unaff_commit_count" = "Unaffiliated Commits", + "wikimedia_commit_count" = "WMF Commits")) + + ggtitle("Feature Commits During Deployment Process") + + theme_bw()+ + theme( + legend.position = "top", + plot.title = element_text(size = 24, face = "bold"), # Increase title font size + axis.title.x = element_text(size = 18), # Increase x-axis title font size + axis.title.y = element_text(size = 18), # Increase y-axis title font size + axis.text.x = element_text(size = 16), # Increase x-axis text font size + axis.text.y = element_text(size = 16), # Increase y-axis text font size + legend.text = element_text(size = 16), # Increase legend text font size + legend.title = element_text(size = 16), + strip.text = element_text(size = 14)# Increase legend title font size + ) + + facet_wrap(~source, nrow = 3, labeller = labeller(source = c( + "c1" = "VisualEditor (commits to extensions/visualeditor)", + "c2" = "HTTPS-as-default (relevant commits to mediawiki/core)", + "c3" = "HTTP-deprecation (relevant commits to mediawiki/core)" + ))) +commit_authors + +ggsave(filename = "ww-0501-commits-faceted.png", plot = commit_authors, width = 12, height = 9, dpi = 800) diff --git a/mgaughan-rstudio-server_25646212.out b/mgaughan-rstudio-server_25646212.out deleted file mode 100644 index b0045bf..0000000 --- a/mgaughan-rstudio-server_25646212.out +++ /dev/null @@ -1,17 +0,0 @@ -1. SSH tunnel from your workstation using the following command: - - ssh -N -L 8787:n3439:35643 mjilg@klone.hyak.uw.edu - - and point your web browser to http://localhost:8787 - -2. log in to RStudio Server using the following credentials: - - user: mjilg - password: 02hEI/v9ULdd5eUb4E2f - -When done using RStudio Server, terminate the job by: - -1. Exit the RStudio Session ("power" button in the top right corner of the RStudio window) -2. Issue the following command on the login node: - - scancel -f 25646212 diff --git a/ww-figures/ww-0501-bot-commits-faceted.png b/ww-figures/ww-0501-bot-commits-faceted.png new file mode 100644 index 0000000..9bcb513 Binary files /dev/null and b/ww-figures/ww-0501-bot-commits-faceted.png differ diff --git a/ww-figures/ww-0501-commits-faceted.png b/ww-figures/ww-0501-commits-faceted.png new file mode 100644 index 0000000..eb5b351 Binary files /dev/null and b/ww-figures/ww-0501-commits-faceted.png differ diff --git a/ww-c1-0430-bot-commits.png b/ww-figures/ww-c1-0430-bot-commits.png similarity index 100% rename from ww-c1-0430-bot-commits.png rename to ww-figures/ww-c1-0430-bot-commits.png diff --git a/ww-c1-0430-bot-spike.png b/ww-figures/ww-c1-0430-bot-spike.png similarity index 100% rename from ww-c1-0430-bot-spike.png rename to ww-figures/ww-c1-0430-bot-spike.png diff --git a/ww-c2-0430-bot-spike.png b/ww-figures/ww-c2-0430-bot-spike.png similarity index 100% rename from ww-c2-0430-bot-spike.png rename to ww-figures/ww-c2-0430-bot-spike.png