From 80c6e2ffba38d475ab17f20d3f687a260ab6f57d Mon Sep 17 00:00:00 2001 From: Matthew Gaughan Date: Mon, 21 Apr 2025 11:52:30 -0700 Subject: [PATCH] stashing plots for CHASE presentation govdoc --- .sh_history | 14 +++++++++ govdoc-cr-age-dist.R | 45 ++++++++++++++++++++++++++++ mgaughan-rstudio-server_25494157.out | 17 +++++++++++ 3 files changed, 76 insertions(+) create mode 100644 govdoc-cr-age-dist.R create mode 100644 mgaughan-rstudio-server_25494157.out diff --git a/.sh_history b/.sh_history index 0d31a35..557db77 100644 --- a/.sh_history +++ b/.sh_history @@ -108,3 +108,17 @@ mv event_0401_extensions_ve_weekly_commit_count_data.csv 042125_stale_commits/ mv event_0401_extensions_ve_weekly_commit_count_data.csv 042125_stale_counts/ mv event_0401_mediawiki_core_weekly_commit_count_data.csv 042125_stale_counts/ ls +cd .. +ls +cd .. +ls +cd mw-repo-lifecycles +ls +cd .. +ls +cd govdoc-cr-data +ls +cd final_data +ls +cd metadata +ls diff --git a/govdoc-cr-age-dist.R b/govdoc-cr-age-dist.R new file mode 100644 index 0000000..44c9ebd --- /dev/null +++ b/govdoc-cr-age-dist.R @@ -0,0 +1,45 @@ +library(dplyr) +contributing_df_filepath <-"/mmfs1/gscratch/comdata/users/mjilg/govdoc-cr-data/final_data/metadata/CONTRIBUTING_weekly_count_data.csv" +contributing_df = read.csv(contributing_df_filepath, header = TRUE) + +readme_df_filepath <- "/mmfs1/gscratch/comdata/users/mjilg/govdoc-cr-data/final_data/metadata/README_weekly_count_data.csv" +readme_df = read.csv(readme_df_filepath, header = TRUE) + +combined_df <- bind_rows( + contributing_df %>% + group_by(project_id) %>% + select(project_id, age_at_commit) %>% + mutate(document = factor("CONTRIBUTING", levels = c("CONTRIBUTING", "README"))), + readme_df %>% + group_by(project_id) %>% + select(project_id, age_at_commit) %>% + mutate(document = factor("README", levels = c("CONTRIBUTING", "README"))) +) + +unique_combined_df <- combined_df %>% + distinct(project_id, age_at_commit, document) + +library(tidyverse) +library(tidyquant) +library(ggdist) +library(ggthemes) +library(ggplot2) + +age_raincloud <- unique_combined_df |> + ggplot(aes(x = factor(document), y = age_at_commit, fill = factor(document))) + + geom_boxplot( + width = 0.12, + # removing outliers + outlier.color = NA, + alpha = 0.5 + ) + + ggplot::stat_dots( + # ploting on left side + side = "left", + # adjusting position + justification = 1.1, + # adjust grouping (binning) of observations + binwidth = 0.25 + ) + +age_raincloud diff --git a/mgaughan-rstudio-server_25494157.out b/mgaughan-rstudio-server_25494157.out new file mode 100644 index 0000000..c8956bc --- /dev/null +++ b/mgaughan-rstudio-server_25494157.out @@ -0,0 +1,17 @@ +1. SSH tunnel from your workstation using the following command: + + ssh -N -L 8787:n3439:46227 mjilg@klone.hyak.uw.edu + + and point your web browser to http://localhost:8787 + +2. log in to RStudio Server using the following credentials: + + user: mjilg + password: AJ9ua2VJPYQLsa6g6Fbq + +When done using RStudio Server, terminate the job by: + +1. Exit the RStudio Session ("power" button in the top right corner of the RStudio window) +2. Issue the following command on the login node: + + scancel -f 25494157