1
0
govdoc-cr-analysis/mlm/gam_plot.R

45 lines
1.5 KiB
R
Raw Normal View History

2025-02-02 20:16:42 +00:00
contributing_df_filepath <- "/mmfs1/gscratch/comdata/users/mjilg/govdoc-cr-data/final_data/CONTRIBUTING_weekly_count_data.csv"
contributing_df = read.csv(contributing_df_filepath, header = TRUE)
readme_df_filepath <- "/mmfs1/gscratch/comdata/users/mjilg/govdoc-cr-data/final_data/README_weekly_count_data.csv"
readme_df = read.csv(readme_df_filepath, header = TRUE)
window_num <- 5
contributing_df <- contributing_df |>
filter(week_index >= (- window_num) & week_index <= (window_num)) |>
mutate(doc_type = "CONTRIBUTING")
readme_df <- readme_df |>
filter(week_index >= (- window_num) & week_index <= (window_num)) |>
mutate(doc_type = "README")
main_df <- rbind(contributing_df, readme_df)
main_df$log1p_count <- log1p(main_df$commit_count)
library(scales)
library(ggplot2)
expm1_trans <- trans_new(
name = 'expm1',
transform = function(x) expm1(x),
inverse = function(x) log1p(x)
)
doctypeColors <-
setNames( c('#5da2d8', '#c7756a')
, c("CONTRIBUTING", "README"))
time_plot <- main_df |>
ggplot(aes(x=week_index, y=commit_count, color=factor(doc_type))) +
scale_y_continuous(trans = 'log1p', labels = scales::comma) +
labs(x="Weekly Offset", y="Commit Count", color="Document Type: ") +
scale_color_manual(values = doctypeColors) +
geom_smooth() +
geom_vline(xintercept = 0)+
theme_bw() +
theme(legend.position = "top")
time_plot
#ggsave(filename = "plots/cr-020225-gam-introduction.png", plot = time_plot, width = 8, height = 6, dpi = 700)