caught duplicates in documents, re-running README topic model

2025-02-03 16:03:58 -08:00 · 2025-02-03 16:03:58 -08:00 · 5ab4b58542
commit 5ab4b58542
parent f52ee087b7
5 changed files with 4277 additions and 3 deletions
--- a/020325_readme_model.rda
+++ b/020325_readme_model.rda
--- a/0203_readme_dweek_ranefs.csv
+++ b/0203_readme_dweek_ranefs.csv
--- a/mg-govdoc-cr_24004290.out
+++ b/mg-govdoc-cr_24004290.out
@ -0,0 +1,17 @@
 1. SSH tunnel from your workstation using the following command:
   ssh -N -L 8787:n3434:42289 mjilg@klone.hyak.uw.edu
   and point your web browser to http://localhost:8787
 2. log in to RStudio Server using the following credentials:
   user: mjilg
   password: K/bHQjx0xRAp26CGpsXM
 When done using RStudio Server, terminate the job by:
 1. Exit the RStudio Session ("power" button in the top right corner of the RStudio window)
 2. Issue the following command on the login node:
      scancel -f 24004290
--- a/mlm/readme_did_model_fit.R
+++ b/mlm/readme_did_model_fit.R
@ -22,7 +22,7 @@ all_gmodel <- glmer.nb(log1p_count ~ before_after * week_index + scaled_age + (b
                       data=readme_df)
 summary(all_gmodel)
-#saveRDS(all_gmodel, "020125_readme_model.rda")
+#saveRDS(all_gmodel, "020325_readme_model.rda")
 model_residuals <- residuals(all_gmodel)
 acf(model_residuals)
@ -35,7 +35,7 @@ variance_components <- as.data.frame(VarCorr(all_gmodel))
 library(broom.mixed)
 library(ggplot2)
 condvals <- broom.mixed::tidy(all_gmodel, effects = "ran_vals", conf.int = TRUE)
-glmer_ranef_Dweek <- test_condvals [which(test_condvals $term == "before_after:week_index"),]
+glmer_ranef_Dweek <- condvals [which(condvals $term == "before_after:week_index"),]
 has_zero <- function(estimate, low, high){
  return(ifelse((low < 0),ifelse((high > 0), 1, 0), 2))
 }
@ -48,4 +48,4 @@ g <- glmer_ranef_Dweek |>
  theme_bw()
 g
-write.csv(glmer_ranef_Dweek, "0201_readme_dweek_ranefs.csv")
+write.csv(glmer_ranef_Dweek, "0203_readme_dweek_ranefs.csv")
--- a/topic-outcome-models/readme_topic_outcome_model.R
+++ b/topic-outcome-models/readme_topic_outcome_model.R
@ -26,5 +26,14 @@ readme_topics_df <- readme_topics_df |>
  mutate(project_id = ifelse(filename=="jaraco_keyrings.alt_hullabaloo_README.rst", "jaraco_keyrings.alt", project_id)) |>
  mutate(project_id = ifelse(filename=="_vcr_vcr_README.md", "vcr_vcr", project_id)) 
 filtered_topics <- readme_topics_df |>
  filter(project_id %in% summed_data$project_id)
 merged_df <- inner_join(summed_data, readme_topics_df, by="project_id")
 multiple_mappings <- merged_df %>%
  group_by(project_id) %>%
  filter(n() > 1) %>%
  ungroup()
 merged_df$logged_commits <- log1p(merged_df$summed_count)