updating similarity vectors
This commit is contained in:
parent
23ef7acd01
commit
c5966518ef
17
mgaughan-rstudio-server_27815770.out
Normal file
17
mgaughan-rstudio-server_27815770.out
Normal file
@ -0,0 +1,17 @@
|
||||
1. SSH tunnel from your workstation using the following command:
|
||||
|
||||
ssh -N -L 8787:n3439:41317 mjilg@klone.hyak.uw.edu
|
||||
|
||||
and point your web browser to http://localhost:8787
|
||||
|
||||
2. log in to RStudio Server using the following credentials:
|
||||
|
||||
user: mjilg
|
||||
password: yo0riOVPbQWPzplKhedd
|
||||
|
||||
When done using RStudio Server, terminate the job by:
|
||||
|
||||
1. Exit the RStudio Session ("power" button in the top right corner of the RStudio window)
|
||||
2. Issue the following command on the login node:
|
||||
|
||||
scancel -f 27815770
|
BIN
p2/authorship_breakdown_cosine_similarity.png
Normal file
BIN
p2/authorship_breakdown_cosine_similarity.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 23 KiB |
BIN
p2/outcome_similarity_vector.png
Normal file
BIN
p2/outcome_similarity_vector.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 56 KiB |
54
p2/quest/neurobiber_cosine.R
Normal file
54
p2/quest/neurobiber_cosine.R
Normal file
@ -0,0 +1,54 @@
|
||||
library(tidyverse)
|
||||
|
||||
neurobiber_csv <-"~/p2/quest/072525_pp_biberplus_labels.csv"
|
||||
neurobiber_df <- read.csv(neurobiber_csv , header = TRUE)
|
||||
|
||||
normalized_cols <- grep("^normalized_", names(neurobiber_df), value = TRUE)
|
||||
|
||||
neurobiber_df$normalized_features_vec <- lapply(
|
||||
asplit(neurobiber_df[, normalized_cols], 1), as.numeric
|
||||
)
|
||||
library(dplyr)
|
||||
neurobiber_df <- neurobiber_df |>
|
||||
filter(comment_type == "task_description")
|
||||
|
||||
X <- do.call(rbind, neurobiber_df$normalized_features_vec)
|
||||
|
||||
library(coop)
|
||||
#cos_sim1 <- coop::cosine(t(X))
|
||||
|
||||
register_means <- aggregate(
|
||||
X,
|
||||
by = list(
|
||||
affiliation = neurobiber_df$AuthorWMFAffil,
|
||||
outcome= neurobiber_df$status
|
||||
),
|
||||
FUN = mean
|
||||
)
|
||||
|
||||
feature_mat <- as.matrix(register_means[, -(1:2)])
|
||||
cos_sim_matrix <- coop::cosine(t(feature_mat))
|
||||
rownames(cos_sim_matrix) <- apply(register_means[, 1:2], 1, paste, collapse = "_")
|
||||
colnames(cos_sim_matrix) <- rownames(cos_sim_matrix)
|
||||
|
||||
|
||||
scaled_mat <- scale(cos_sim_matrix)
|
||||
#pheatmap(scaled_mat, symm = TRUE)
|
||||
#heatmap(cos_sim_matrix, col=heat.colors(256), breaks=seq(-1, 1, length.out=257))
|
||||
library(pheatmap)
|
||||
pheatmap(cos_sim_matrix,
|
||||
register_rows = FALSE, # Now features are clustered (rows)
|
||||
register_cols = FALSE,
|
||||
scale='none') # Standardize featu
|
||||
|
||||
library(reshape2)
|
||||
library(ggplot2)
|
||||
sim_df <- melt(cos_sim_matrix, na.rm = TRUE)
|
||||
ggplot(sim_df, aes(Var1, Var2, fill = value)) +
|
||||
geom_tile() +
|
||||
scale_fill_gradient2(low = "white", high = "red", mid = "blue", midpoint = 0.5, limit = c(0,1)) +
|
||||
theme(axis.text.x = element_text(angle = 90, hjust = 1))
|
||||
|
||||
diag(cos_sim_matrix) <- NA
|
||||
which(cos_sim_matrix == max(cos_sim_matrix, na.rm = TRUE), arr.ind = TRUE) # Most similar
|
||||
which(cos_sim_matrix == min(cos_sim_matrix, na.rm = TRUE), arr.ind = TRUE) # Least similar
|
Loading…
Reference in New Issue
Block a user