diff --git a/mgaughan-rstudio-server_27815770.out b/mgaughan-rstudio-server_27815770.out new file mode 100644 index 0000000..1106020 --- /dev/null +++ b/mgaughan-rstudio-server_27815770.out @@ -0,0 +1,17 @@ +1. SSH tunnel from your workstation using the following command: + + ssh -N -L 8787:n3439:41317 mjilg@klone.hyak.uw.edu + + and point your web browser to http://localhost:8787 + +2. log in to RStudio Server using the following credentials: + + user: mjilg + password: yo0riOVPbQWPzplKhedd + +When done using RStudio Server, terminate the job by: + +1. Exit the RStudio Session ("power" button in the top right corner of the RStudio window) +2. Issue the following command on the login node: + + scancel -f 27815770 diff --git a/p2/authorship_breakdown_cosine_similarity.png b/p2/authorship_breakdown_cosine_similarity.png new file mode 100644 index 0000000..bd11371 Binary files /dev/null and b/p2/authorship_breakdown_cosine_similarity.png differ diff --git a/p2/outcome_similarity_vector.png b/p2/outcome_similarity_vector.png new file mode 100644 index 0000000..d7e9e46 Binary files /dev/null and b/p2/outcome_similarity_vector.png differ diff --git a/p2/quest/neurobiber_cosine.R b/p2/quest/neurobiber_cosine.R new file mode 100644 index 0000000..0523291 --- /dev/null +++ b/p2/quest/neurobiber_cosine.R @@ -0,0 +1,54 @@ +library(tidyverse) + +neurobiber_csv <-"~/p2/quest/072525_pp_biberplus_labels.csv" +neurobiber_df <- read.csv(neurobiber_csv , header = TRUE) + +normalized_cols <- grep("^normalized_", names(neurobiber_df), value = TRUE) + +neurobiber_df$normalized_features_vec <- lapply( + asplit(neurobiber_df[, normalized_cols], 1), as.numeric +) +library(dplyr) +neurobiber_df <- neurobiber_df |> + filter(comment_type == "task_description") + +X <- do.call(rbind, neurobiber_df$normalized_features_vec) + +library(coop) +#cos_sim1 <- coop::cosine(t(X)) + +register_means <- aggregate( + X, + by = list( + affiliation = neurobiber_df$AuthorWMFAffil, + outcome= neurobiber_df$status + ), + FUN = mean +) + +feature_mat <- as.matrix(register_means[, -(1:2)]) +cos_sim_matrix <- coop::cosine(t(feature_mat)) +rownames(cos_sim_matrix) <- apply(register_means[, 1:2], 1, paste, collapse = "_") +colnames(cos_sim_matrix) <- rownames(cos_sim_matrix) + + +scaled_mat <- scale(cos_sim_matrix) +#pheatmap(scaled_mat, symm = TRUE) +#heatmap(cos_sim_matrix, col=heat.colors(256), breaks=seq(-1, 1, length.out=257)) +library(pheatmap) +pheatmap(cos_sim_matrix, + register_rows = FALSE, # Now features are clustered (rows) + register_cols = FALSE, + scale='none') # Standardize featu + +library(reshape2) +library(ggplot2) +sim_df <- melt(cos_sim_matrix, na.rm = TRUE) +ggplot(sim_df, aes(Var1, Var2, fill = value)) + + geom_tile() + + scale_fill_gradient2(low = "white", high = "red", mid = "blue", midpoint = 0.5, limit = c(0,1)) + + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + +diag(cos_sim_matrix) <- NA +which(cos_sim_matrix == max(cos_sim_matrix, na.rm = TRUE), arr.ind = TRUE) # Most similar +which(cos_sim_matrix == min(cos_sim_matrix, na.rm = TRUE), arr.ind = TRUE) # Least similar