diff --git a/.RData b/.RData
index ad0122b..078b83e 100644
Binary files a/.RData and b/.RData differ
diff --git a/mg-govdoc-cr_24074016.out b/mg-govdoc-cr_24085748.out
similarity index 77%
rename from mg-govdoc-cr_24074016.out
rename to mg-govdoc-cr_24085748.out
index e84d9fb..4ae0279 100644
--- a/mg-govdoc-cr_24074016.out
+++ b/mg-govdoc-cr_24085748.out
@@ -1,17 +1,17 @@
 1. SSH tunnel from your workstation using the following command:
 
-   ssh -N -L 8787:n3434:49157 mjilg@klone.hyak.uw.edu
+   ssh -N -L 8787:n3436:56811 mjilg@klone.hyak.uw.edu
 
    and point your web browser to http://localhost:8787
 
 2. log in to RStudio Server using the following credentials:
 
    user: mjilg
-   password: shDpem/m/RHo7HO1CuWG
+   password: +g73U+bdF4uygmNdsKEt
 
 When done using RStudio Server, terminate the job by:
 
 1. Exit the RStudio Session ("power" button in the top right corner of the RStudio window)
 2. Issue the following command on the login node:
 
-      scancel -f 24074016
+      scancel -f 24085748
diff --git a/plots/0207-blup-readability-plot.png b/plots/0207-blup-readability-plot.png
new file mode 100644
index 0000000..413649e
Binary files /dev/null and b/plots/0207-blup-readability-plot.png differ
diff --git a/plots/cr-0207-contributing-blup.png b/plots/cr-0207-contributing-blup.png
new file mode 100644
index 0000000..5cb8a9f
Binary files /dev/null and b/plots/cr-0207-contributing-blup.png differ
diff --git a/plots/cr-0207-wc-density.png b/plots/cr-0207-wc-density.png
new file mode 100644
index 0000000..d8d2a97
Binary files /dev/null and b/plots/cr-0207-wc-density.png differ
diff --git a/plots/mem_presentation.R b/plots/mem_presentation.R
index f9ab6de..e4ea54b 100644
--- a/plots/mem_presentation.R
+++ b/plots/mem_presentation.R
@@ -1,16 +1,16 @@
 library(tidyverse)
 library(texreg)
 
-readme_rdd <- readRDS("mlm/models/020325_readme_model.rda")
-contrib_rdd <- readRDS("mlm/models/020125_contributing_model.rda")
+readme_rdd <- readRDS("mlm/models/020725_readme_model.rda")
+contrib_rdd <- readRDS("mlm/models/020725_contributing_model.rda")
 
 texreg(list(readme_rdd, contrib_rdd), stars=NULL, digits=3, use.packages=FALSE, 
        custom.model.names=c( 'README','CONTRIBUTING'),
        custom.coef.names=c('(Intercept)', 'Indtroduction', 'Week (Time)', 'Project Age', 'Introduction:Week'), 
        table=FALSE, ci.force = TRUE)
 
-readme_groupings <- read.csv('mlm/data/0203_readme_dweek_ranefs.csv')
-contrib_groupings <- read.csv('mlm/data/0201_contributing_dweek_ranefs.csv')
+readme_groupings <- read.csv('mlm/data/0207_readme_dweek_ranefs.csv')
+contrib_groupings <- read.csv('mlm/data/0207_contributing_dweek_ranefs.csv')
 
 subdirColors <-
   setNames( c('#31449c', '#4a7c85', '#c5db68')
@@ -35,7 +35,7 @@ contrib_g <- contrib_groupings |>
   labs(x="RE Coefficient Rank", y="RE Coefficient Estimate", color="Estimate Grouping")
 contrib_g
 
-#ggsave(filename = "plots/cr-0203-contributing-blup.png", plot = contrib_g, width = 9, height = 9, dpi = 800)
+ggsave(filename = "plots/cr-0207-contributing-blup.png", plot = contrib_g, width = 9, height = 9, dpi = 800)
 
 
 texreg(list(readme_commits_, commits_), stars=NULL, digits=3, use.packages=FALSE, 
diff --git a/plots/text_presentation.R b/plots/text_presentation.R
index c213488..17f8a8e 100644
--- a/plots/text_presentation.R
+++ b/plots/text_presentation.R
@@ -1,22 +1,24 @@
 library(tidyverse)
-readme_groupings <- read.csv('text_analysis/0203_readme_merged_manifest.csv')
-contrib_groupings <- read.csv('text_analysis/0203_contributing_merged_manifest.csv')
+readme_groupings <- read.csv('text_analysis/0207_readme_merged_manifest.csv')
+contrib_groupings <- read.csv('text_analysis/0207_contributing_merged_manifest.csv')
 contrib_groupings$filename <- contrib_groupings$fvf_filepath
 readme_groupings$filename <- readme_groupings$fvf_filepath
-readme_textstat <- read.csv('text_analysis/020325_README_readability.csv')
-contributing_textstat <- read.csv('text_analysis/020125_CONTRIBUTING_readability.csv')
+readme_textstat <- read.csv('text_analysis/020725_README_readability.csv')
+contributing_textstat <- read.csv('text_analysis/020725_CONTRIBUTING_readability.csv')
 
 
 doctypeColors <-
-  setNames( c('#5da2d8', '#c7756a')
-            , c("CONTRIBUTING", "README"))
+  setNames( c('#c7756a','#5da2d8')
+            , c("README","CONTRIBUTING"))
 readme_textstat$type = "README"
 contributing_textstat$type = "CONTRIBUTING"
-all_df = rbind(readme_textstat, contributing_textstat)
+all_df = rbind(contributing_textstat,readme_textstat)
+all_df$type <- factor(all_df$type, levels = c("CONTRIBUTING", "README"))
+
 length_plot_all <- ggplot(all_df, aes(x=word_count, group=as.factor(type))) + 
   geom_density(aes(fill = as.factor(type)), color = NA, alpha=0.6, position="identity")+
   scale_fill_manual(values = doctypeColors) + 
-  xlim(-10, 500) +
+  xlim(-10, 600) +
   labs(
     x = "Word Count",
     y = "Density Across Documents",
@@ -26,10 +28,10 @@ length_plot_all <- ggplot(all_df, aes(x=word_count, group=as.factor(type))) +
   theme(legend.position = "top") 
 length_plot_all
 
-#ggsave(filename = "plots/cr-0203-wc-density.png", plot = length_plot_all, width = 9, height = 9, dpi = 800)
+#ggsave(filename = "plots/cr-0207-wc-density.png", plot = length_plot_all, width = 9, height = 9, dpi = 800)
 
-contributing_df <- inner_join(contributing_textstat, contrib_groupings, by="filename")
-readme_df <- inner_join(readme_textstat, readme_groupings, by="filename")
+contributing_df <- inner_join(contributing_textstat, contrib_groupings, by=c("filename"="new_filepath"))
+readme_df <- inner_join(readme_textstat, readme_groupings, by=c("filename"="new_filepath"))
 
 subdirColors <-
   setNames( c('#31449c', '#4a7c85', '#c5db68')
@@ -42,7 +44,7 @@ contributing_reading_time_plot <- ggplot(contributing_df, aes(x=reading_time, gr
   labs(x= NULL, y= NULL, fill="RE Grouping")+ 
   theme_bw() +
   theme(legend.position = "inside", 
-        legend.position.inside = c(.90, .90),
+        legend.position.inside = c(.89, .92),
         legend.justification = c("right", "top"),
         legend.direction = "horizontal",
         legend.margin = margin(6, 6, 6, 6))
@@ -64,7 +66,7 @@ readme_reading_time_plot <- ggplot(readme_df, aes(x=reading_time, group=as.facto
   labs(x= "Reading Time (s)", y= NULL)+ 
   guides(fill="none", color="none")+
   theme_bw() 
-#readme_reading_time_plot
+readme_reading_time_plot
 
 readme_reading_ease <- ggplot(readme_df, aes(x=flesch_reading_ease, group=as.factor(ranef_grouping))) + 
   geom_density(aes(fill=as.factor(ranef_grouping)), position="fill") +
@@ -73,7 +75,7 @@ readme_reading_ease <- ggplot(readme_df, aes(x=flesch_reading_ease, group=as.fac
   labs(x= "Flesch Reading Ease", y= "README Density")+ 
   guides(fill="none", color="none")+
   theme_bw() 
-#readme_reading_ease 
+readme_reading_ease 
 library(gridExtra)
 grid.arrange(contributing_reading_ease, contributing_reading_time_plot, readme_reading_ease, readme_reading_time_plot, nrow = 2)
 
diff --git a/topic-outcome-models/020325_CONTRIBUTING_commit_topic_model.rda b/topic-outcome-models/020325_CONTRIBUTING_commit_topic_model.rda
deleted file mode 100644
index 123d23a..0000000
Binary files a/topic-outcome-models/020325_CONTRIBUTING_commit_topic_model.rda and /dev/null differ
diff --git a/topic-outcome-models/020325_README_commit_topic_model.rda b/topic-outcome-models/020325_README_commit_topic_model.rda
deleted file mode 100644
index dd3e05f..0000000
Binary files a/topic-outcome-models/020325_README_commit_topic_model.rda and /dev/null differ
diff --git a/topic-outcome-models/020725_CONTRIBUTING_commit_topic_model.rda b/topic-outcome-models/020725_CONTRIBUTING_commit_topic_model.rda
new file mode 100644
index 0000000..caee617
Binary files /dev/null and b/topic-outcome-models/020725_CONTRIBUTING_commit_topic_model.rda differ
diff --git a/topic-outcome-models/020725_README_commit_topic_model.rda b/topic-outcome-models/020725_README_commit_topic_model.rda
new file mode 100644
index 0000000..bc1415c
Binary files /dev/null and b/topic-outcome-models/020725_README_commit_topic_model.rda differ
diff --git a/topic-outcome-models/contributing_topic_outcome_model.R b/topic-outcome-models/contributing_topic_outcome_model.R
index 822c843..720a3f5 100644
--- a/topic-outcome-models/contributing_topic_outcome_model.R
+++ b/topic-outcome-models/contributing_topic_outcome_model.R
@@ -3,15 +3,20 @@ library(lubridate)
 library(rdd)
 library(stringr)
 
-contributing_count_data_filepath <- "/mmfs1/gscratch/comdata/users/mjilg/govdoc-cr-data/final_data/CONTRIBUTING_weekly_count_data.csv"
+contributing_count_data_filepath <-  "/mmfs1/gscratch/comdata/users/mjilg/govdoc-cr-data/final_data/metadata/final_0207_CONTRIBUTING_weekly_count_data.csv"
 contributing_count_df = read.csv(contributing_count_data_filepath, header = TRUE) 
 
-contributing_topic_dist_filepath <- "text_analysis/020125_CONTRIBUTING_file_topic_distributions.csv"
+contributing_topic_dist_filepath <- "text_analysis/020725_CONTRIBUTING_file_topic_distributions.csv"
 contributing_topics_df = read.csv(contributing_topic_dist_filepath, header = TRUE) 
 
+contributing_merged_manifest <- "text_analysis/0207_contributing_merged_manifest.csv"
+contributing_manifest_df <- read.csv(contributing_merged_manifest, header=TRUE)
+
+merged_df <- inner_join(contributing_manifest_df, contributing_topics_df, by=c("new_filepath"= "filename"))
+
 window_num <- 5
 contributing_count_df <- contributing_count_df |>
-  filter(week_index >= (- window_num) & week_index <= (window_num)) |>
+  filter(relative_week >= (- window_num) & relative_week <= (window_num)) |>
   mutate(scaled_age = scale(age)) |>
   mutate(scaled_age_at_commit = scale(age_at_commit))|>
   mutate(log1p_count = log1p(commit_count))
@@ -21,12 +26,7 @@ summed_data <- contributing_count_df |>
   group_by(project_id) |>
   summarise_at(vars(commit_count), list(summed_count=sum))
 
-contributing_topics_df <- contributing_topics_df |>
-  mutate(project_id = sapply(str_split(filename, "_hullabaloo_"), `[`, 1)) |>
-  mutate(project_id = ifelse(filename=="_vcr_vcr_CONTRIBUTING.md", "vcr_vcr", project_id)) |>
-  mutate(project_id = ifelse(filename=="marshmallow-code_marshmallow.git_CONTRIBUTING.rst", "marshmallow-code_marshmallow.git", project_id))
-
-merged_df <- inner_join(summed_data, contributing_topics_df, by="project_id")
+merged_df <- inner_join(summed_data, merged_df, by=c("project_id" = "repo_id"))
 merged_df$logged_commits <- log1p(merged_df$summed_count)
   
 library(MASS)
@@ -34,4 +34,4 @@ library(MASS)
 commit_outcome_model <- glm.nb(logged_commits ~ 0 + t0 + t1 + t2 + t3 + t4, data=merged_df)
 qqnorm(residuals(commit_outcome_model))
 summary(commit_outcome_model)
-saveRDS(commit_outcome_model, "020325_commit_topic_model.rda")
+saveRDS(commit_outcome_model, "020725_commit_topic_model.rda")
diff --git a/topic-outcome-models/readme_topic_outcome_model.R b/topic-outcome-models/readme_topic_outcome_model.R
index 6b4100c..3551d08 100644
--- a/topic-outcome-models/readme_topic_outcome_model.R
+++ b/topic-outcome-models/readme_topic_outcome_model.R
@@ -3,15 +3,20 @@ library(lubridate)
 library(rdd)
 library(stringr)
 
-readme_count_data_filepath <- "/mmfs1/gscratch/comdata/users/mjilg/govdoc-cr-data/final_data/README_weekly_count_data.csv"
+readme_count_data_filepath <- "/mmfs1/gscratch/comdata/users/mjilg/govdoc-cr-data/final_data/metadata/final_0207_README_weekly_count_data.csv"
 readme_count_df = read.csv(readme_count_data_filepath, header = TRUE) 
 
-readme_topic_dist_filepath <- "text_analysis/020325_README_file_topic_distributions.csv"
+readme_topic_dist_filepath <- "text_analysis/020725_README_file_topic_distributions.csv"
 readme_topics_df = read.csv(readme_topic_dist_filepath, header = TRUE) 
 
+readme_merged_manifest <- "text_analysis/0207_readme_merged_manifest.csv"
+readme_manifest_df <- read.csv(readme_merged_manifest, header=TRUE)
+
+merged_df <- inner_join(readme_manifest_df, readme_topics_df, by=c("new_filepath"= "filename"))
+
 window_num <- 5
 readme_count_df <- readme_count_df |>
-  filter(week_index >= (- window_num) & week_index <= (window_num)) |>
+  filter(relative_week >= (- window_num) & relative_week <= (window_num)) |>
   mutate(scaled_age = scale(age)) |>
   mutate(scaled_age_at_commit = scale(age_at_commit))|>
   mutate(log1p_count = log1p(commit_count))
@@ -21,19 +26,15 @@ summed_data <- readme_count_df |>
   group_by(project_id) |>
   summarise_at(vars(commit_count), list(summed_count=sum))
 
-readme_topics_df <- readme_topics_df |>
-  mutate(project_id = sapply(str_split(filename, "_hullabaloo_"), `[`, 1)) |>
-  mutate(project_id = ifelse(filename=="jaraco_keyrings.alt_hullabaloo_README.rst", "jaraco_keyrings.alt", project_id)) |>
-  mutate(project_id = ifelse(filename=="_vcr_vcr_README.md", "vcr_vcr", project_id)) 
-  
+
 #loss of jaraco_keyring, though jaraco keyrings.alt is represented
-merged_df <- inner_join(summed_data, readme_topics_df, by="project_id")
+merged_df <- inner_join(summed_data, merged_df, by=c("project_id" = "repo_id"))
 
 merged_df$logged_commits <- log1p(merged_df$summed_count)
 
 library(MASS)
-commit_outcome_model <- glm.nb(logged_commits ~ 0 + t0 + t1 + t2 + t3 + t4 + t5 + t6 + t7 + t8 + t9 + t10, data=merged_df)
+commit_outcome_model <- glm.nb(logged_commits ~ 0 + t0 + t1 + t2 + t3 + t4 + t5 + t6 + t7 + t8, data=merged_df)
 qqnorm(residuals(commit_outcome_model))
 summary(commit_outcome_model)
 
-saveRDS(commit_outcome_model, "020325_CONTRIBUTING_commit_topic_model.rda")
+saveRDS(commit_outcome_model, "020725_README_commit_topic_model.rda")