updating with new heatmap for FOSSY presentation
This commit is contained in:
		
							parent
							
								
									c5966518ef
								
							
						
					
					
						commit
						b624109f8d
					
				
							
								
								
									
										
											BIN
										
									
								
								affiliation_heatmap_fossy_plot.png
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								affiliation_heatmap_fossy_plot.png
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							| After Width: | Height: | Size: 65 KiB | 
| @ -1,17 +1,17 @@ | |||||||
| 1. SSH tunnel from your workstation using the following command: | 1. SSH tunnel from your workstation using the following command: | ||||||
| 
 | 
 | ||||||
|    ssh -N -L 8787:n3439:41317 mjilg@klone.hyak.uw.edu |    ssh -N -L 8787:n3441:48367 mjilg@klone.hyak.uw.edu | ||||||
| 
 | 
 | ||||||
|    and point your web browser to http://localhost:8787 |    and point your web browser to http://localhost:8787 | ||||||
| 
 | 
 | ||||||
| 2. log in to RStudio Server using the following credentials: | 2. log in to RStudio Server using the following credentials: | ||||||
| 
 | 
 | ||||||
|    user: mjilg |    user: mjilg | ||||||
|    password: yo0riOVPbQWPzplKhedd |    password: WYkG3aRTe0NQjsw3Ayg6 | ||||||
| 
 | 
 | ||||||
| When done using RStudio Server, terminate the job by: | When done using RStudio Server, terminate the job by: | ||||||
| 
 | 
 | ||||||
| 1. Exit the RStudio Session ("power" button in the top right corner of the RStudio window) | 1. Exit the RStudio Session ("power" button in the top right corner of the RStudio window) | ||||||
| 2. Issue the following command on the login node: | 2. Issue the following command on the login node: | ||||||
| 
 | 
 | ||||||
|       scancel -f 27815770 |       scancel -f 27817681 | ||||||
| @ -9,8 +9,18 @@ neurobiber_df$normalized_features_vec <- lapply( | |||||||
|   asplit(neurobiber_df[, normalized_cols], 1), as.numeric |   asplit(neurobiber_df[, normalized_cols], 1), as.numeric | ||||||
| ) | ) | ||||||
| library(dplyr) | library(dplyr) | ||||||
|  | # duplicate, declined, invalid -> declined  | ||||||
|  | # stalled, open, progress -> open | ||||||
|  | # resolved -> resolved | ||||||
| neurobiber_df <- neurobiber_df |> | neurobiber_df <- neurobiber_df |> | ||||||
|   filter(comment_type == "task_description") |   filter(comment_type == "task_description") |> | ||||||
|  |   mutate( | ||||||
|  |     task_status = case_when( | ||||||
|  |       status %in% c("duplicate", "declined", "invalid") ~ "declined", | ||||||
|  |       status %in% c("stalled", "open", "progress") ~ "open", | ||||||
|  |       status == "resolved" ~ "resolved", | ||||||
|  |       TRUE ~ status  # fallback for unexpected values | ||||||
|  |     )) | ||||||
| 
 | 
 | ||||||
| X <- do.call(rbind, neurobiber_df$normalized_features_vec) | X <- do.call(rbind, neurobiber_df$normalized_features_vec) | ||||||
| 
 | 
 | ||||||
| @ -20,34 +30,31 @@ library(coop) | |||||||
| register_means <- aggregate( | register_means <- aggregate( | ||||||
|   X, |   X, | ||||||
|   by = list( |   by = list( | ||||||
|     affiliation = neurobiber_df$AuthorWMFAffil, |     priority = neurobiber_df$priority, | ||||||
|     outcome= neurobiber_df$status |     outcome= neurobiber_df$task_status, | ||||||
|  |     phase = neurobiber_df$phase, | ||||||
|  |     source = neurobiber_df$source, | ||||||
|  |     affiliation = neurobiber_df$AuthorWMFAffil | ||||||
|   ), |   ), | ||||||
|   FUN = mean |   FUN = mean | ||||||
| ) | ) | ||||||
| 
 | 
 | ||||||
| feature_mat <- as.matrix(register_means[, -(1:2)])   | feature_mat <- as.matrix(register_means[, -(1:5)])   | ||||||
| cos_sim_matrix <- coop::cosine(t(feature_mat)) | cos_sim_matrix <- coop::cosine(t(feature_mat)) | ||||||
| rownames(cos_sim_matrix) <- apply(register_means[, 1:2], 1, paste, collapse = "_") | rownames(cos_sim_matrix) <- apply(register_means[, 1:5], 1, paste, collapse = "_") | ||||||
| colnames(cos_sim_matrix) <- rownames(cos_sim_matrix) | colnames(cos_sim_matrix) <- rownames(cos_sim_matrix) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| scaled_mat <- scale(cos_sim_matrix) | scaled_mat <- scale(cos_sim_matrix) | ||||||
| #pheatmap(scaled_mat, symm = TRUE) | #pheatmap(scaled_mat, symm = TRUE) | ||||||
| #heatmap(cos_sim_matrix, col=heat.colors(256), breaks=seq(-1, 1, length.out=257)) | #heatmap(cos_sim_matrix, col=heat.colors(256), breaks=seq(-1, 1, length.out=257)) | ||||||
|  | library(viridis) | ||||||
| library(pheatmap) | library(pheatmap) | ||||||
| pheatmap(cos_sim_matrix,  | pheatmap(cos_sim_matrix,  | ||||||
|          register_rows = FALSE,    # Now features are clustered (rows) |          cluster_rows = FALSE,    # Now features are clustered (rows) | ||||||
|          register_cols = FALSE, |          cluster_cols = FALSE, | ||||||
|          scale='none')         # Standardize featu |          scale='none', | ||||||
| 
 |          color = viridis(100))         # Standardize featu | ||||||
| library(reshape2) |  | ||||||
| library(ggplot2) |  | ||||||
| sim_df <- melt(cos_sim_matrix, na.rm = TRUE) |  | ||||||
| ggplot(sim_df, aes(Var1, Var2, fill = value)) + |  | ||||||
|   geom_tile() + |  | ||||||
|   scale_fill_gradient2(low = "white", high = "red", mid = "blue", midpoint = 0.5, limit = c(0,1)) + |  | ||||||
|   theme(axis.text.x = element_text(angle = 90, hjust = 1)) |  | ||||||
| 
 | 
 | ||||||
| diag(cos_sim_matrix) <- NA | diag(cos_sim_matrix) <- NA | ||||||
| which(cos_sim_matrix == max(cos_sim_matrix, na.rm = TRUE), arr.ind = TRUE)  # Most similar | which(cos_sim_matrix == max(cos_sim_matrix, na.rm = TRUE), arr.ind = TRUE)  # Most similar | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user