1
0
govdoc-cr-analysis/topic-outcome-models/readme_topic_outcome_model.R

41 lines
1.5 KiB
R
Raw Normal View History

2025-02-03 22:21:27 +00:00
library(dplyr)
library(lubridate)
library(rdd)
library(stringr)
2025-02-08 00:59:56 +00:00
readme_count_data_filepath <- "/mmfs1/gscratch/comdata/users/mjilg/govdoc-cr-data/final_data/metadata/final_0207_README_weekly_count_data.csv"
2025-02-03 22:21:27 +00:00
readme_count_df = read.csv(readme_count_data_filepath, header = TRUE)
2025-02-08 00:59:56 +00:00
readme_topic_dist_filepath <- "text_analysis/020725_README_file_topic_distributions.csv"
2025-02-03 22:21:27 +00:00
readme_topics_df = read.csv(readme_topic_dist_filepath, header = TRUE)
2025-02-08 00:59:56 +00:00
readme_merged_manifest <- "text_analysis/0207_readme_merged_manifest.csv"
readme_manifest_df <- read.csv(readme_merged_manifest, header=TRUE)
merged_df <- inner_join(readme_manifest_df, readme_topics_df, by=c("new_filepath"= "filename"))
2025-02-03 22:21:27 +00:00
window_num <- 5
readme_count_df <- readme_count_df |>
2025-02-08 00:59:56 +00:00
filter(relative_week >= (- window_num) & relative_week <= (window_num)) |>
2025-02-03 22:21:27 +00:00
mutate(scaled_age = scale(age)) |>
mutate(scaled_age_at_commit = scale(age_at_commit))|>
mutate(log1p_count = log1p(commit_count))
summed_data <- readme_count_df |>
filter(before_after == 1) |>
group_by(project_id) |>
summarise_at(vars(commit_count), list(summed_count=sum))
2025-02-08 00:59:56 +00:00
2025-02-04 01:19:01 +00:00
#loss of jaraco_keyring, though jaraco keyrings.alt is represented
2025-02-08 00:59:56 +00:00
merged_df <- inner_join(summed_data, merged_df, by=c("project_id" = "repo_id"))
2025-02-03 22:21:27 +00:00
merged_df$logged_commits <- log1p(merged_df$summed_count)
2025-02-04 01:19:01 +00:00
library(MASS)
2025-02-08 00:59:56 +00:00
commit_outcome_model <- glm.nb(logged_commits ~ 0 + t0 + t1 + t2 + t3 + t4 + t5 + t6 + t7 + t8, data=merged_df)
2025-02-04 01:19:01 +00:00
qqnorm(residuals(commit_outcome_model))
summary(commit_outcome_model)
2025-02-08 00:59:56 +00:00
saveRDS(commit_outcome_model, "020725_README_commit_topic_model.rda")