2025-02-03 22:21:27 +00:00
|
|
|
library(dplyr)
|
|
|
|
library(lubridate)
|
|
|
|
library(rdd)
|
|
|
|
library(stringr)
|
|
|
|
|
|
|
|
readme_count_data_filepath <- "/mmfs1/gscratch/comdata/users/mjilg/govdoc-cr-data/final_data/README_weekly_count_data.csv"
|
|
|
|
readme_count_df = read.csv(readme_count_data_filepath, header = TRUE)
|
|
|
|
|
2025-02-04 01:19:01 +00:00
|
|
|
readme_topic_dist_filepath <- "text_analysis/020325_README_file_topic_distributions.csv"
|
2025-02-03 22:21:27 +00:00
|
|
|
readme_topics_df = read.csv(readme_topic_dist_filepath, header = TRUE)
|
|
|
|
|
|
|
|
window_num <- 5
|
|
|
|
readme_count_df <- readme_count_df |>
|
|
|
|
filter(week_index >= (- window_num) & week_index <= (window_num)) |>
|
|
|
|
mutate(scaled_age = scale(age)) |>
|
|
|
|
mutate(scaled_age_at_commit = scale(age_at_commit))|>
|
|
|
|
mutate(log1p_count = log1p(commit_count))
|
|
|
|
|
|
|
|
summed_data <- readme_count_df |>
|
|
|
|
filter(before_after == 1) |>
|
|
|
|
group_by(project_id) |>
|
|
|
|
summarise_at(vars(commit_count), list(summed_count=sum))
|
|
|
|
|
|
|
|
readme_topics_df <- readme_topics_df |>
|
2025-02-04 01:19:01 +00:00
|
|
|
mutate(project_id = sapply(str_split(filename, "_hullabaloo_"), `[`, 1)) |>
|
2025-02-03 22:21:27 +00:00
|
|
|
mutate(project_id = ifelse(filename=="jaraco_keyrings.alt_hullabaloo_README.rst", "jaraco_keyrings.alt", project_id)) |>
|
|
|
|
mutate(project_id = ifelse(filename=="_vcr_vcr_README.md", "vcr_vcr", project_id))
|
|
|
|
|
2025-02-04 01:19:01 +00:00
|
|
|
#loss of jaraco_keyring, though jaraco keyrings.alt is represented
|
2025-02-03 22:21:27 +00:00
|
|
|
merged_df <- inner_join(summed_data, readme_topics_df, by="project_id")
|
2025-02-04 00:03:58 +00:00
|
|
|
|
2025-02-03 22:21:27 +00:00
|
|
|
merged_df$logged_commits <- log1p(merged_df$summed_count)
|
2025-02-04 01:19:01 +00:00
|
|
|
|
|
|
|
library(MASS)
|
|
|
|
commit_outcome_model <- glm.nb(logged_commits ~ 0 + t0 + t1 + t2 + t3 + t4 + t5 + t6 + t7 + t8 + t9 + t10, data=merged_df)
|
|
|
|
qqnorm(residuals(commit_outcome_model))
|
|
|
|
summary(commit_outcome_model)
|
|
|
|
|
|
|
|
saveRDS(commit_outcome_model, "020325_CONTRIBUTING_commit_topic_model.rda")
|