library(tidyverse) neurobiber_csv <-"~/p2/quest/072525_pp_biberplus_labels.csv" neurobiber_df <- read.csv(neurobiber_csv , header = TRUE) normalized_cols <- grep("^normalized_", names(neurobiber_df), value = TRUE) neurobiber_df$normalized_features_vec <- lapply( asplit(neurobiber_df[, normalized_cols], 1), as.numeric ) library(dplyr) # duplicate, declined, invalid -> declined # stalled, open, progress -> open # resolved -> resolved neurobiber_df <- neurobiber_df |> filter(comment_type == "task_description") |> mutate( task_status = case_when( status %in% c("duplicate", "declined", "invalid") ~ "declined", status %in% c("stalled", "open", "progress") ~ "open", status == "resolved" ~ "resolved", TRUE ~ status # fallback for unexpected values )) X <- do.call(rbind, neurobiber_df$normalized_features_vec) library(coop) #cos_sim1 <- coop::cosine(t(X)) register_means <- aggregate( X, by = list( outcome= neurobiber_df$task_status, source = neurobiber_df$source, affiliation = neurobiber_df$AuthorWMFAffil ), FUN = mean ) feature_mat <- as.matrix(register_means[, -(1:3)]) cos_sim_matrix <- coop::cosine(t(feature_mat)) rownames(cos_sim_matrix) <- apply(register_means[, 1:3], 1, paste, collapse = "_") colnames(cos_sim_matrix) <- rownames(cos_sim_matrix) annotation_row <- data.frame( affiliation = register_means$affiliation, source = register_means$source ) rownames(annotation_row) <- rownames(cos_sim_matrix) annotation_col <- data.frame( affiliation = register_means$affiliation, source = register_means$source ) rownames(annotation_col) <- colnames(cos_sim_matrix) annotation_row <- annotation_row |> mutate(affil = case_when( affiliation == "True" ~ "WMF", affiliation == "False" ~ "non-WMF" )) |> select(-affiliation) annotation_col <- annotation_col |> mutate(affil = case_when( affiliation == "True" ~ "WMF", affiliation == "False" ~ "non-WMF" )) |> select(-affiliation) my_annotation_colors = list( affil = c("WMF" = "green", "non-WMF" = "purple"), source = c(c1 = "lightgrey", c2 = "grey", c3 = "black") ) cos_sim_matrix[lower.tri(cos_sim_matrix)] <- NA #pheatmap(scaled_mat, symm = TRUE) #heatmap(cos_sim_matrix, col=heat.colors(256), breaks=seq(-1, 1, length.out=257)) library(viridis) library(pheatmap) fossy_heatmap <- pheatmap(cos_sim_matrix, cluster_rows = FALSE, cluster_cols = FALSE, scale='none', annotation_row = annotation_row, annotation_col = annotation_col, annotation_colors = my_annotation_colors, na_col = "white") ggsave(filename = "073125_FOSSY_comm_heatmap.png", plot = fossy_heatmap, width = 9, height = 9, dpi = 800) #diag(cos_sim_matrix) <- NA #which(cos_sim_matrix == max(cos_sim_matrix, na.rm = TRUE), arr.ind = TRUE) # Most similar #which(cos_sim_matrix == min(cos_sim_matrix, na.rm = TRUE), arr.ind = TRUE) # Least similar