2025-02-04 04:50:34 +00:00
|
|
|
library(dplyr)
|
2025-02-02 20:16:42 +00:00
|
|
|
contributing_df_filepath <- "/mmfs1/gscratch/comdata/users/mjilg/govdoc-cr-data/final_data/CONTRIBUTING_weekly_count_data.csv"
|
|
|
|
contributing_df = read.csv(contributing_df_filepath, header = TRUE)
|
|
|
|
|
|
|
|
readme_df_filepath <- "/mmfs1/gscratch/comdata/users/mjilg/govdoc-cr-data/final_data/README_weekly_count_data.csv"
|
|
|
|
readme_df = read.csv(readme_df_filepath, header = TRUE)
|
|
|
|
|
|
|
|
window_num <- 5
|
|
|
|
contributing_df <- contributing_df |>
|
|
|
|
filter(week_index >= (- window_num) & week_index <= (window_num)) |>
|
|
|
|
mutate(doc_type = "CONTRIBUTING")
|
|
|
|
|
|
|
|
readme_df <- readme_df |>
|
|
|
|
filter(week_index >= (- window_num) & week_index <= (window_num)) |>
|
|
|
|
mutate(doc_type = "README")
|
|
|
|
|
|
|
|
main_df <- rbind(contributing_df, readme_df)
|
|
|
|
main_df$log1p_count <- log1p(main_df$commit_count)
|
|
|
|
|
|
|
|
library(scales)
|
|
|
|
library(ggplot2)
|
|
|
|
|
|
|
|
expm1_trans <- trans_new(
|
|
|
|
name = 'expm1',
|
|
|
|
transform = function(x) expm1(x),
|
|
|
|
inverse = function(x) log1p(x)
|
|
|
|
)
|
|
|
|
|
|
|
|
doctypeColors <-
|
|
|
|
setNames( c('#5da2d8', '#c7756a')
|
|
|
|
, c("CONTRIBUTING", "README"))
|
|
|
|
|
|
|
|
time_plot <- main_df |>
|
|
|
|
ggplot(aes(x=week_index, y=commit_count, color=factor(doc_type))) +
|
|
|
|
scale_y_continuous(trans = 'log1p', labels = scales::comma) +
|
|
|
|
labs(x="Weekly Offset", y="Commit Count", color="Document Type: ") +
|
|
|
|
scale_color_manual(values = doctypeColors) +
|
|
|
|
geom_smooth() +
|
|
|
|
geom_vline(xintercept = 0)+
|
|
|
|
theme_bw() +
|
|
|
|
theme(legend.position = "top")
|
|
|
|
time_plot
|
|
|
|
|
2025-02-04 04:50:34 +00:00
|
|
|
ggsave(filename = "plots/cr-020325-gam-introduction.png", plot = time_plot, width = 9, height = 9, dpi = 800)
|