1
0

updating with new heatmap for FOSSY presentation

This commit is contained in:
Matthew Gaughan 2025-07-29 14:25:19 -07:00
parent c5966518ef
commit b624109f8d
3 changed files with 26 additions and 19 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 65 KiB

View File

@ -1,17 +1,17 @@
1. SSH tunnel from your workstation using the following command: 1. SSH tunnel from your workstation using the following command:
ssh -N -L 8787:n3439:41317 mjilg@klone.hyak.uw.edu ssh -N -L 8787:n3441:48367 mjilg@klone.hyak.uw.edu
and point your web browser to http://localhost:8787 and point your web browser to http://localhost:8787
2. log in to RStudio Server using the following credentials: 2. log in to RStudio Server using the following credentials:
user: mjilg user: mjilg
password: yo0riOVPbQWPzplKhedd password: WYkG3aRTe0NQjsw3Ayg6
When done using RStudio Server, terminate the job by: When done using RStudio Server, terminate the job by:
1. Exit the RStudio Session ("power" button in the top right corner of the RStudio window) 1. Exit the RStudio Session ("power" button in the top right corner of the RStudio window)
2. Issue the following command on the login node: 2. Issue the following command on the login node:
scancel -f 27815770 scancel -f 27817681

View File

@ -9,8 +9,18 @@ neurobiber_df$normalized_features_vec <- lapply(
asplit(neurobiber_df[, normalized_cols], 1), as.numeric asplit(neurobiber_df[, normalized_cols], 1), as.numeric
) )
library(dplyr) library(dplyr)
# duplicate, declined, invalid -> declined
# stalled, open, progress -> open
# resolved -> resolved
neurobiber_df <- neurobiber_df |> neurobiber_df <- neurobiber_df |>
filter(comment_type == "task_description") filter(comment_type == "task_description") |>
mutate(
task_status = case_when(
status %in% c("duplicate", "declined", "invalid") ~ "declined",
status %in% c("stalled", "open", "progress") ~ "open",
status == "resolved" ~ "resolved",
TRUE ~ status # fallback for unexpected values
))
X <- do.call(rbind, neurobiber_df$normalized_features_vec) X <- do.call(rbind, neurobiber_df$normalized_features_vec)
@ -20,34 +30,31 @@ library(coop)
register_means <- aggregate( register_means <- aggregate(
X, X,
by = list( by = list(
affiliation = neurobiber_df$AuthorWMFAffil, priority = neurobiber_df$priority,
outcome= neurobiber_df$status outcome= neurobiber_df$task_status,
phase = neurobiber_df$phase,
source = neurobiber_df$source,
affiliation = neurobiber_df$AuthorWMFAffil
), ),
FUN = mean FUN = mean
) )
feature_mat <- as.matrix(register_means[, -(1:2)]) feature_mat <- as.matrix(register_means[, -(1:5)])
cos_sim_matrix <- coop::cosine(t(feature_mat)) cos_sim_matrix <- coop::cosine(t(feature_mat))
rownames(cos_sim_matrix) <- apply(register_means[, 1:2], 1, paste, collapse = "_") rownames(cos_sim_matrix) <- apply(register_means[, 1:5], 1, paste, collapse = "_")
colnames(cos_sim_matrix) <- rownames(cos_sim_matrix) colnames(cos_sim_matrix) <- rownames(cos_sim_matrix)
scaled_mat <- scale(cos_sim_matrix) scaled_mat <- scale(cos_sim_matrix)
#pheatmap(scaled_mat, symm = TRUE) #pheatmap(scaled_mat, symm = TRUE)
#heatmap(cos_sim_matrix, col=heat.colors(256), breaks=seq(-1, 1, length.out=257)) #heatmap(cos_sim_matrix, col=heat.colors(256), breaks=seq(-1, 1, length.out=257))
library(viridis)
library(pheatmap) library(pheatmap)
pheatmap(cos_sim_matrix, pheatmap(cos_sim_matrix,
register_rows = FALSE, # Now features are clustered (rows) cluster_rows = FALSE, # Now features are clustered (rows)
register_cols = FALSE, cluster_cols = FALSE,
scale='none') # Standardize featu scale='none',
color = viridis(100)) # Standardize featu
library(reshape2)
library(ggplot2)
sim_df <- melt(cos_sim_matrix, na.rm = TRUE)
ggplot(sim_df, aes(Var1, Var2, fill = value)) +
geom_tile() +
scale_fill_gradient2(low = "white", high = "red", mid = "blue", midpoint = 0.5, limit = c(0,1)) +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
diag(cos_sim_matrix) <- NA diag(cos_sim_matrix) <- NA
which(cos_sim_matrix == max(cos_sim_matrix, na.rm = TRUE), arr.ind = TRUE) # Most similar which(cos_sim_matrix == max(cos_sim_matrix, na.rm = TRUE), arr.ind = TRUE) # Most similar