96 lines
2.9 KiB
R
96 lines
2.9 KiB
R
library(tidyverse)
|
|
|
|
neurobiber_csv <-"~/p2/quest/072525_pp_biberplus_labels.csv"
|
|
neurobiber_df <- read.csv(neurobiber_csv , header = TRUE)
|
|
|
|
normalized_cols <- grep("^normalized_", names(neurobiber_df), value = TRUE)
|
|
|
|
neurobiber_df$normalized_features_vec <- lapply(
|
|
asplit(neurobiber_df[, normalized_cols], 1), as.numeric
|
|
)
|
|
library(dplyr)
|
|
# duplicate, declined, invalid -> declined
|
|
# stalled, open, progress -> open
|
|
# resolved -> resolved
|
|
neurobiber_df <- neurobiber_df |>
|
|
filter(comment_type == "task_description") |>
|
|
mutate(
|
|
task_status = case_when(
|
|
status %in% c("duplicate", "declined", "invalid") ~ "declined",
|
|
status %in% c("stalled", "open", "progress") ~ "open",
|
|
status == "resolved" ~ "resolved",
|
|
TRUE ~ status # fallback for unexpected values
|
|
))
|
|
|
|
X <- do.call(rbind, neurobiber_df$normalized_features_vec)
|
|
|
|
library(coop)
|
|
#cos_sim1 <- coop::cosine(t(X))
|
|
|
|
|
|
register_means <- aggregate(
|
|
X,
|
|
by = list(
|
|
outcome= neurobiber_df$task_status,
|
|
source = neurobiber_df$source,
|
|
affiliation = neurobiber_df$AuthorWMFAffil
|
|
),
|
|
FUN = mean
|
|
)
|
|
|
|
feature_mat <- as.matrix(register_means[, -(1:3)])
|
|
cos_sim_matrix <- coop::cosine(t(feature_mat))
|
|
rownames(cos_sim_matrix) <- apply(register_means[, 1:3], 1, paste, collapse = "_")
|
|
colnames(cos_sim_matrix) <- rownames(cos_sim_matrix)
|
|
|
|
|
|
annotation_row <- data.frame(
|
|
affiliation = register_means$affiliation,
|
|
source = register_means$source
|
|
)
|
|
rownames(annotation_row) <- rownames(cos_sim_matrix)
|
|
|
|
annotation_col <- data.frame(
|
|
affiliation = register_means$affiliation,
|
|
source = register_means$source
|
|
)
|
|
rownames(annotation_col) <- colnames(cos_sim_matrix)
|
|
|
|
annotation_row <- annotation_row |>
|
|
mutate(affil = case_when(
|
|
affiliation == "True" ~ "WMF",
|
|
affiliation == "False" ~ "non-WMF"
|
|
)) |> select(-affiliation)
|
|
|
|
annotation_col <- annotation_col |>
|
|
mutate(affil = case_when(
|
|
affiliation == "True" ~ "WMF",
|
|
affiliation == "False" ~ "non-WMF"
|
|
)) |> select(-affiliation)
|
|
|
|
|
|
my_annotation_colors = list(
|
|
affil = c("WMF" = "green", "non-WMF" = "purple"),
|
|
source = c(c1 = "lightgrey", c2 = "grey", c3 = "black")
|
|
)
|
|
|
|
cos_sim_matrix[lower.tri(cos_sim_matrix)] <- NA
|
|
#pheatmap(scaled_mat, symm = TRUE)
|
|
#heatmap(cos_sim_matrix, col=heat.colors(256), breaks=seq(-1, 1, length.out=257))
|
|
library(viridis)
|
|
library(pheatmap)
|
|
fossy_heatmap <- pheatmap(cos_sim_matrix,
|
|
cluster_rows = FALSE,
|
|
cluster_cols = FALSE,
|
|
scale='none',
|
|
annotation_row = annotation_row,
|
|
annotation_col = annotation_col,
|
|
annotation_colors = my_annotation_colors,
|
|
na_col = "white")
|
|
|
|
ggsave(filename = "073125_FOSSY_comm_heatmap.png", plot = fossy_heatmap, width = 9, height = 9, dpi = 800)
|
|
|
|
#diag(cos_sim_matrix) <- NA
|
|
#which(cos_sim_matrix == max(cos_sim_matrix, na.rm = TRUE), arr.ind = TRUE) # Most similar
|
|
#which(cos_sim_matrix == min(cos_sim_matrix, na.rm = TRUE), arr.ind = TRUE) # Least similar
|