diff --git a/affiliation_heatmap_fossy_plot.png b/affiliation_heatmap_fossy_plot.png new file mode 100644 index 0000000..d9428f1 Binary files /dev/null and b/affiliation_heatmap_fossy_plot.png differ diff --git a/mgaughan-rstudio-server_27815770.out b/mgaughan-rstudio-server_27817681.out similarity index 77% rename from mgaughan-rstudio-server_27815770.out rename to mgaughan-rstudio-server_27817681.out index 1106020..985ba7a 100644 --- a/mgaughan-rstudio-server_27815770.out +++ b/mgaughan-rstudio-server_27817681.out @@ -1,17 +1,17 @@ 1. SSH tunnel from your workstation using the following command: - ssh -N -L 8787:n3439:41317 mjilg@klone.hyak.uw.edu + ssh -N -L 8787:n3441:48367 mjilg@klone.hyak.uw.edu and point your web browser to http://localhost:8787 2. log in to RStudio Server using the following credentials: user: mjilg - password: yo0riOVPbQWPzplKhedd + password: WYkG3aRTe0NQjsw3Ayg6 When done using RStudio Server, terminate the job by: 1. Exit the RStudio Session ("power" button in the top right corner of the RStudio window) 2. Issue the following command on the login node: - scancel -f 27815770 + scancel -f 27817681 diff --git a/p2/quest/neurobiber_cosine.R b/p2/quest/neurobiber_cosine.R index 0523291..2e94cff 100644 --- a/p2/quest/neurobiber_cosine.R +++ b/p2/quest/neurobiber_cosine.R @@ -9,8 +9,18 @@ neurobiber_df$normalized_features_vec <- lapply( asplit(neurobiber_df[, normalized_cols], 1), as.numeric ) library(dplyr) +# duplicate, declined, invalid -> declined +# stalled, open, progress -> open +# resolved -> resolved neurobiber_df <- neurobiber_df |> - filter(comment_type == "task_description") + filter(comment_type == "task_description") |> + mutate( + task_status = case_when( + status %in% c("duplicate", "declined", "invalid") ~ "declined", + status %in% c("stalled", "open", "progress") ~ "open", + status == "resolved" ~ "resolved", + TRUE ~ status # fallback for unexpected values + )) X <- do.call(rbind, neurobiber_df$normalized_features_vec) @@ -20,34 +30,31 @@ library(coop) register_means <- aggregate( X, by = list( - affiliation = neurobiber_df$AuthorWMFAffil, - outcome= neurobiber_df$status + priority = neurobiber_df$priority, + outcome= neurobiber_df$task_status, + phase = neurobiber_df$phase, + source = neurobiber_df$source, + affiliation = neurobiber_df$AuthorWMFAffil ), FUN = mean ) -feature_mat <- as.matrix(register_means[, -(1:2)]) +feature_mat <- as.matrix(register_means[, -(1:5)]) cos_sim_matrix <- coop::cosine(t(feature_mat)) -rownames(cos_sim_matrix) <- apply(register_means[, 1:2], 1, paste, collapse = "_") +rownames(cos_sim_matrix) <- apply(register_means[, 1:5], 1, paste, collapse = "_") colnames(cos_sim_matrix) <- rownames(cos_sim_matrix) scaled_mat <- scale(cos_sim_matrix) #pheatmap(scaled_mat, symm = TRUE) #heatmap(cos_sim_matrix, col=heat.colors(256), breaks=seq(-1, 1, length.out=257)) +library(viridis) library(pheatmap) pheatmap(cos_sim_matrix, - register_rows = FALSE, # Now features are clustered (rows) - register_cols = FALSE, - scale='none') # Standardize featu - -library(reshape2) -library(ggplot2) -sim_df <- melt(cos_sim_matrix, na.rm = TRUE) -ggplot(sim_df, aes(Var1, Var2, fill = value)) + - geom_tile() + - scale_fill_gradient2(low = "white", high = "red", mid = "blue", midpoint = 0.5, limit = c(0,1)) + - theme(axis.text.x = element_text(angle = 90, hjust = 1)) + cluster_rows = FALSE, # Now features are clustered (rows) + cluster_cols = FALSE, + scale='none', + color = viridis(100)) # Standardize featu diag(cos_sim_matrix) <- NA which(cos_sim_matrix == max(cos_sim_matrix, na.rm = TRUE), arr.ind = TRUE) # Most similar