library(tidyverse) neurobiber_csv <-"~/p2/quest/072525_pp_biberplus_labels.csv" neurobiber_df <- read.csv(neurobiber_csv , header = TRUE) normalized_cols <- grep("^normalized_", names(neurobiber_df), value = TRUE) neurobiber_df$normalized_features_vec <- lapply( asplit(neurobiber_df[, normalized_cols], 1), as.numeric ) library(dplyr) neurobiber_df <- neurobiber_df |> filter(comment_type == "task_description") X <- do.call(rbind, neurobiber_df$normalized_features_vec) library(coop) #cos_sim1 <- coop::cosine(t(X)) register_means <- aggregate( X, by = list( affiliation = neurobiber_df$AuthorWMFAffil, outcome= neurobiber_df$status ), FUN = mean ) feature_mat <- as.matrix(register_means[, -(1:2)]) cos_sim_matrix <- coop::cosine(t(feature_mat)) rownames(cos_sim_matrix) <- apply(register_means[, 1:2], 1, paste, collapse = "_") colnames(cos_sim_matrix) <- rownames(cos_sim_matrix) scaled_mat <- scale(cos_sim_matrix) #pheatmap(scaled_mat, symm = TRUE) #heatmap(cos_sim_matrix, col=heat.colors(256), breaks=seq(-1, 1, length.out=257)) library(pheatmap) pheatmap(cos_sim_matrix, register_rows = FALSE, # Now features are clustered (rows) register_cols = FALSE, scale='none') # Standardize featu library(reshape2) library(ggplot2) sim_df <- melt(cos_sim_matrix, na.rm = TRUE) ggplot(sim_df, aes(Var1, Var2, fill = value)) + geom_tile() + scale_fill_gradient2(low = "white", high = "red", mid = "blue", midpoint = 0.5, limit = c(0,1)) + theme(axis.text.x = element_text(angle = 90, hjust = 1)) diag(cos_sim_matrix) <- NA which(cos_sim_matrix == max(cos_sim_matrix, na.rm = TRUE), arr.ind = TRUE) # Most similar which(cos_sim_matrix == min(cos_sim_matrix, na.rm = TRUE), arr.ind = TRUE) # Least similar