diff --git a/.sh_history b/.sh_history index 5a5200f..b9bfa4a 100644 --- a/.sh_history +++ b/.sh_history @@ -151,3 +151,20 @@ lds ls cd ../case2 ls +ls /gscratch/comdata/users/mjilg/mw-repo-lifecycles/commit_data/bot_frameworks +cd /gscratch/comdata/users/mjilg/mw-repo-lifecycles/commit_data/bot_frameworks +ls +cd .. +ls +mkdir php_and_python_bots +rm -r php_and_python_bots +ls +ls bot_frameworks +cd /mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case1/ +ls +rm event_0430_framework_commit_counts.csv +ls +cd .. +ls +cd case2 +ls diff --git a/commit_analysis/bot-framework-commits.R b/commit_analysis/bot-framework-commits.R new file mode 100644 index 0000000..742e310 --- /dev/null +++ b/commit_analysis/bot-framework-commits.R @@ -0,0 +1,67 @@ +library(tidyverse) +count_data_fp <-"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case1/event_0430_framework_commit_counts.csv" +input_df <- read.csv(count_data_fp, header = TRUE) + +input_df$nonbot_commit_count <- input_df$commit_count - input_df$bot_commit_count +input_df$unaff_commit_count <- input_df$nonbot_commit_count - input_df$wikimedia_commit_count + + +library(scales) +library(ggplot2) + +long_df <- input_df |> + tidyr::pivot_longer(cols = c(unaff_commit_count, wikimedia_commit_count), + names_to = "commit_type", + values_to = "lengthened_commit_count") + +summed_df <- long_df |> + group_by(relative_week, project, commit_type) |> + summarize(total_commits = sum(lengthened_commit_count), .groups = 'drop') + +affiliationColors <- + setNames( c('#5da2d8', '#c7756a') + ,c("unaff_commit_count", "wikimedia_commit_count")) + +commit_authors <- summed_df |> + ggplot(aes(x=relative_week, + y=total_commits, + fill=factor(commit_type))) + + geom_col(position='dodge') + + labs(x = "Relative Week", y = "Commits", fill="Commit Type") + + scale_fill_manual(values = affiliationColors, + labels = c("unaff_commit_count" = "Unaffiliated Commits", + "wikimedia_commit_count" = "WMF Commits")) + + ggtitle("Weekly Commits to Bot Frameworks During VE Deployment") + + theme_bw() + + theme(legend.position = "top") +commit_authors + +ggsave(filename = "ww-c1-0430-bot-commits.png", plot = commit_authors, width = 12, height = 9, dpi = 800) + + +input_df$returning_unaff_commit_count = input_df$unaff_commit_count - input_df$unaff_new_commit_count + +new_authors_long_df <- input_df |> + filter(relative_week >= -4) |> + tidyr::pivot_longer(cols = c(unaff_new_commit_count, returning_unaff_commit_count), + names_to = "commit_seniority", + values_to = "lengthened_commit_count") + +new_unaff_authors <- new_authors_long_df |> + ggplot(aes(x=relative_week, + y=lengthened_commit_count, + fill=commit_seniority)) + + geom_col(position='dodge') + + labs(x = "Relative Week", y = "Commits", fill="Commit Seniority ('New' contributors <= 5 commits before 06-06-2013)") + + scale_fill_manual(values = c("returning_unaff_commit_count" = "#FFC107", # Color for "Returning Contributors" + "unaff_new_commit_count" = "#004D40"), + labels = c("returning_unaff_commit_count" = "Returning Contributors", + "unaff_new_commit_count" = "New Contributors") + ) + + ggtitle("Unaffiliated Bot Framework Commits Surrounding VE Deployment") + + theme_bw() + + theme(legend.position = "top") + +new_unaff_authors + +ggsave(filename = "ww-c1-0430-bot-spike.png", plot = new_unaff_authors, width = 12, height = 9, dpi = 800) diff --git a/commit_analysis/framework_commit_collation.R b/commit_analysis/framework_commit_collation.R index 18382a7..36c516c 100644 --- a/commit_analysis/framework_commit_collation.R +++ b/commit_analysis/framework_commit_collation.R @@ -20,9 +20,9 @@ all_data <- csv_files %>% map_df(read_and_label) # TODO: this is project/event specific -event_date <- as.Date("2013-07-01") -#event_date <- as.Date("2013-04-25") -#event_date <- as.Date("2012-12-11") +#event_date <- as.Date("2013-07-01") +#event_date <- as.Date("2013-08-28") +event_date <- as.Date("2015-07-02") df <- all_data |> mutate(commit_date = ymd_hms(commit_date)) @@ -33,27 +33,40 @@ df <- df %>% ungroup() %>% mutate(age = as.numeric(as.Date("2025-02-10") - oldest_commit_date)) -filtered_df <- df %>% +#discard projects who are created after the focal event date +df <- df %>% group_by(project) %>% filter(min(as.Date(commit_date)) <= event_date) %>% ungroup() -calculated_start_date <- event_date %m-% months(12) -start_date <- max(calculated_start_date, df$oldest_commit_date) -end_date <- event_date %m+% months(12) +#calculated_start_date <- event_date %m-% months(12) +#%start_date <- max(calculated_start_date, df$oldest_commit_date) +#end_date <- event_date %m+% months(12) #getting the relative weeks to the publication date relative_week <- function(date, ref_date) { as.integer(as.numeric(difftime(date, ref_date, units = "days")) %/% 7) } -filtered_df <- filtered_df |> +old_author_list <- df |> + filter(commit_date > as.Date("2013-07-01") & commit_date < as.Date("2015-06-12"))|> + group_by(author_email) |> + summarise(commit_count = n()) |> + filter(commit_count > 5) |> + pull(author_email) + +df <- df |> + mutate(new_author = ifelse(author_email %in% old_author_list, 0, 1), + new_author_wmf = if_else(grepl("@wikimedia", author_email), + new_author, 0), + new_author_unaff = if_else(!grepl("@wikimedia", author_email) & + !grepl("l10n-bot@translatewiki.net|tools.libraryupgrader@tools.wmflabs.org", author_email), + new_author, 0)) + +filtered_df <- df |> mutate(relative_week = relative_week(commit_date, event_date)) |> arrange(relative_week) |> - group_by(author_email) |> - mutate(new_author = ifelse(row_number() <= 5, 1, 0), - new_author_wmf = if_else(grepl("@wikimedia", author_email), new_author, 0), - new_author_unaff = if_else(!grepl("@wikimedia", author_email), new_author, 0)) |> + filter(relative_week >= (-104) & relative_week <= 13) |> ungroup() @@ -66,8 +79,8 @@ weekly_commits <- filtered_df |> wikimedia_commit_count = sum(grepl("@wikimedia", author_email)), wikia_commit_count = sum(grepl("@wikia-inc.com", author_email)), bot_commit_count = sum(grepl("l10n-bot@translatewiki.net|tools.libraryupgrader@tools.wmflabs.org", author_email)), - wmf_ft_commit_count = sum(new_author_wmf), - unaff_ft_commit_count = sum(new_author_unaff), + wmf_new_commit_count = sum(new_author_wmf), + unaff_new_commit_count = sum(new_author_unaff), .groups = 'drop') |> replace_na(list(commit_count = 0)) |> replace_na(list(wikimedia_commit_count = 0)) |> @@ -81,10 +94,10 @@ weekly_commits <- filtered_df |> select(-author_emails, -committer_emails) -weekly_commits <- weekly_commits |> - filter(relative_week >= (-52) & relative_week <= 52 ) +#weekly_commits <- weekly_commits |> +# filter(relative_week >= (-52) & relative_week <= 52 ) weekly_commits -output_filepath <-"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case1/event_0314_bot_frameworks_weekly_commit_count_data.csv" +output_filepath <-"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case3/event_0430_framework_commit_counts.csv" write.csv(weekly_commits, output_filepath, row.names = FALSE) \ No newline at end of file diff --git a/mgaughan-rstudio-server_25646212.out b/mgaughan-rstudio-server_25646212.out new file mode 100644 index 0000000..b0045bf --- /dev/null +++ b/mgaughan-rstudio-server_25646212.out @@ -0,0 +1,17 @@ +1. SSH tunnel from your workstation using the following command: + + ssh -N -L 8787:n3439:35643 mjilg@klone.hyak.uw.edu + + and point your web browser to http://localhost:8787 + +2. log in to RStudio Server using the following credentials: + + user: mjilg + password: 02hEI/v9ULdd5eUb4E2f + +When done using RStudio Server, terminate the job by: + +1. Exit the RStudio Session ("power" button in the top right corner of the RStudio window) +2. Issue the following command on the login node: + + scancel -f 25646212 diff --git a/ww-c1-0430-bot-commits.png b/ww-c1-0430-bot-commits.png new file mode 100644 index 0000000..be36bed Binary files /dev/null and b/ww-c1-0430-bot-commits.png differ diff --git a/ww-c1-0430-bot-spike.png b/ww-c1-0430-bot-spike.png new file mode 100644 index 0000000..ae647e5 Binary files /dev/null and b/ww-c1-0430-bot-spike.png differ diff --git a/ww-c2-0430-bot-spike.png b/ww-c2-0430-bot-spike.png new file mode 100644 index 0000000..ac8cbdc Binary files /dev/null and b/ww-c2-0430-bot-spike.png differ diff --git a/ww-figures/ww-c2-0430-bot-commits.png b/ww-figures/ww-c2-0430-bot-commits.png new file mode 100644 index 0000000..a7ab016 Binary files /dev/null and b/ww-figures/ww-c2-0430-bot-commits.png differ diff --git a/ww-figures/ww-c3-0430-bot-spike.png b/ww-figures/ww-c3-0430-bot-spike.png new file mode 100644 index 0000000..59f3c2f Binary files /dev/null and b/ww-figures/ww-c3-0430-bot-spike.png differ