diff --git a/073125-fossy-tasks-resolved.png b/artifact-figures/073125-fossy-tasks-resolved.png
similarity index 100%
rename from 073125-fossy-tasks-resolved.png
rename to artifact-figures/073125-fossy-tasks-resolved.png
diff --git a/073125_FOSSY_comm_heatmap.png b/artifact-figures/073125_FOSSY_comm_heatmap.png
similarity index 100%
rename from 073125_FOSSY_comm_heatmap.png
rename to artifact-figures/073125_FOSSY_comm_heatmap.png
diff --git a/affiliation_heatmap_fossy_plot.png b/artifact-figures/affiliation_heatmap_fossy_plot.png
similarity index 100%
rename from affiliation_heatmap_fossy_plot.png
rename to artifact-figures/affiliation_heatmap_fossy_plot.png
diff --git a/mgaughan-rstudio-server_27851458.out b/mgaughan-rstudio-server_27851458.out
deleted file mode 100644
index 79aae66..0000000
--- a/mgaughan-rstudio-server_27851458.out
+++ /dev/null
@@ -1,17 +0,0 @@
-1. SSH tunnel from your workstation using the following command:
-
-   ssh -N -L 8787:n3441:59491 mjilg@klone.hyak.uw.edu
-
-   and point your web browser to http://localhost:8787
-
-2. log in to RStudio Server using the following credentials:
-
-   user: mjilg
-   password: QSTMw7+SdHBKq8hU9/1q
-
-When done using RStudio Server, terminate the job by:
-
-1. Exit the RStudio Session ("power" button in the top right corner of the RStudio window)
-2. Issue the following command on the login node:
-
-      scancel -f 27851458
diff --git a/p2/gerrit_script.R b/p2/gerrit_script.R
new file mode 100644
index 0000000..4d19aa2
--- /dev/null
+++ b/p2/gerrit_script.R
@@ -0,0 +1,14 @@
+library(tidyverse)
+
+gerrit_csv <-"/gscratch/comdata/users/mjilg/mw-repo-lifecycles/080425_gerrit_filled_df.csv"
+gerrit_df <- read.csv(gerrit_csv , header = TRUE) 
+
+library(dplyr)
+messages_with_urls <- gerrit_df |>
+  dplyr::filter(`gerrit_change_urls` != "[]")
+library(stringr)
+incomplete_data <- messages_with_urls |> 
+  dplyr::filter(str_detect(gerrit_full_results, "'full_result': None\\}"))
+
+maybe_incomplete_data <- messages_with_urls |> 
+  dplyr::filter(str_detect(gerrit_full_results, "None"))
diff --git a/p2/p2_EDA/080425_population_EDA.R b/p2/p2_EDA/080425_population_EDA.R
new file mode 100644
index 0000000..be5925a
--- /dev/null
+++ b/p2/p2_EDA/080425_population_EDA.R
@@ -0,0 +1,12 @@
+library(tidyverse)
+
+neurobiber_csv <-"~/p2/071425_master_discussion_data.csv"
+neurobiber_df <- read.csv(neurobiber_csv , header = TRUE) 
+
+unique_authors <- unique(neurobiber_df$AuthorPHID)
+unique_authors_df <- neurobiber_df[!duplicated(neurobiber_df$AuthorPHID), ]
+table(unique_authors_df$AuthorWMFAffil)
+
+task_description_unique_authors <- unique_authors_df |>
+  filter(comment_type == "task_description")
+table(task_description_unique_authors$AuthorWMFAffil)
diff --git a/p2/quest/neurobiber_cosine.R b/p2/quest/neurobiber_cosine.R
index 15cb004..ff7badf 100644
--- a/p2/quest/neurobiber_cosine.R
+++ b/p2/quest/neurobiber_cosine.R
@@ -43,7 +43,59 @@ cos_sim_matrix <- coop::cosine(t(feature_mat))
 rownames(cos_sim_matrix) <- apply(register_means[, 1:3], 1, paste, collapse = "_")
 colnames(cos_sim_matrix) <- rownames(cos_sim_matrix)
 
+#finding the most dissimilar pairs 
 
+compare_feature_vectors <- function(
+    pair1, pair2, 
+    cos_sim_matrix, 
+    feature_mat, 
+    normalized_cols, 
+    top_n = 5
+) {
+  # Allow for both index and name input
+  if (is.character(pair1)) row_idx <- which(rownames(cos_sim_matrix) == pair1) else row_idx <- pair1
+  if (is.character(pair2)) col_idx <- which(colnames(cos_sim_matrix) == pair2) else col_idx <- pair2
+  
+  # Get feature vectors
+  vec1 <- feature_mat[row_idx, ]
+  vec2 <- feature_mat[col_idx, ]
+  
+  # Feature-wise absolute differences
+  feature_diff <- abs(vec1 - vec2)
+  top_features_idx <- order(feature_diff, decreasing = TRUE)[1:top_n]
+  top_features <- names(feature_diff)[top_features_idx]
+  top_diffs <- feature_diff[top_features_idx]
+  
+  # Map Vxx to normalized column names
+  feature_nums <- as.integer(sub("V", "", top_features))
+  feature_colnames <- normalized_cols[feature_nums]
+  
+  # Determine which vector is larger for each feature
+  larger_in <- ifelse(vec1[top_features_idx] > vec2[top_features_idx],
+                      rownames(cos_sim_matrix)[row_idx],
+                      colnames(cos_sim_matrix)[col_idx])
+  
+  # Assemble results
+  top_features_df <- data.frame(
+    feature = top_features,
+    normalized_colname = feature_colnames,
+    vec1_value = vec1[top_features_idx],
+    vec2_value = vec2[top_features_idx],
+    abs_difference = top_diffs,
+    larger_in = larger_in
+  )
+  
+  # Print pair and return
+  cat("Comparing:", rownames(cos_sim_matrix)[row_idx], "and", colnames(cos_sim_matrix)[col_idx], "\n")
+  print(top_features_df)
+  invisible(top_features_df)
+}
+
+compare_feature_vectors("resolved_c1_True", "resolved_c2_True", cos_sim_matrix, feature_mat, normalized_cols, top_n = 10)
+
+
+
+#plotting stuff beneath here
 annotation_row <- data.frame(
   affiliation = register_means$affiliation,
   source = register_means$source
@@ -88,7 +140,7 @@ fossy_heatmap <- pheatmap(cos_sim_matrix,
            annotation_colors = my_annotation_colors,
            na_col = "white")         
 
-ggsave(filename = "073125_FOSSY_comm_heatmap.png", plot = fossy_heatmap, width = 9, height = 9, dpi = 800)
+#ggsave(filename = "073125_FOSSY_comm_heatmap.png", plot = fossy_heatmap, width = 9, height = 9, dpi = 800)
 
 #diag(cos_sim_matrix) <- NA
 #which(cos_sim_matrix == max(cos_sim_matrix, na.rm = TRUE), arr.ind = TRUE)  # Most similar