1
0
mw-lifecycle-analysis/govdoc-cr-age-dist.R
2025-04-21 11:52:30 -07:00

46 lines
1.3 KiB
R

library(dplyr)
contributing_df_filepath <-"/mmfs1/gscratch/comdata/users/mjilg/govdoc-cr-data/final_data/metadata/CONTRIBUTING_weekly_count_data.csv"
contributing_df = read.csv(contributing_df_filepath, header = TRUE)
readme_df_filepath <- "/mmfs1/gscratch/comdata/users/mjilg/govdoc-cr-data/final_data/metadata/README_weekly_count_data.csv"
readme_df = read.csv(readme_df_filepath, header = TRUE)
combined_df <- bind_rows(
contributing_df %>%
group_by(project_id) %>%
select(project_id, age_at_commit) %>%
mutate(document = factor("CONTRIBUTING", levels = c("CONTRIBUTING", "README"))),
readme_df %>%
group_by(project_id) %>%
select(project_id, age_at_commit) %>%
mutate(document = factor("README", levels = c("CONTRIBUTING", "README")))
)
unique_combined_df <- combined_df %>%
distinct(project_id, age_at_commit, document)
library(tidyverse)
library(tidyquant)
library(ggdist)
library(ggthemes)
library(ggplot2)
age_raincloud <- unique_combined_df |>
ggplot(aes(x = factor(document), y = age_at_commit, fill = factor(document))) +
geom_boxplot(
width = 0.12,
# removing outliers
outlier.color = NA,
alpha = 0.5
) +
ggplot::stat_dots(
# ploting on left side
side = "left",
# adjusting position
justification = 1.1,
# adjust grouping (binning) of observations
binwidth = 0.25
)
age_raincloud