adding in analysis of biberplus vectors
This commit is contained in:
		
							parent
							
								
									b0584ec1be
								
							
						
					
					
						commit
						a08a49d04e
					
				| @ -1,15 +1,16 @@ | |||||||
| library(tidyverse) | library(tidyverse) | ||||||
| 
 | 
 | ||||||
| neurobiber_csv <-"~/p2/quest/071525_neurobiber_labels.csv" | neurobiber_csv <-"~/p2/quest/072325_biberplus_labels.csv" | ||||||
| neurobiber_df <- read.csv(neurobiber_csv , header = TRUE)  | neurobiber_df <- read.csv(neurobiber_csv , header = TRUE)  | ||||||
| 
 | 
 | ||||||
| neurobiber_df$features_vec  <- lapply(neurobiber_df$neurobiber_preds, function(x) { | normalized_cols <- grep("^normalized_", names(neurobiber_df), value = TRUE) | ||||||
|   x <- gsub("\\[|\\]", "", x) | 
 | ||||||
|   x <- trimws(x) | neurobiber_df$normalized_features_vec <- lapply( | ||||||
|   as.numeric(unlist(strsplit(x, "\\s+"))) |   asplit(neurobiber_df[, normalized_cols], 1), as.numeric | ||||||
| }) | ) | ||||||
|  | 
 | ||||||
|  | X <- do.call(rbind, neurobiber_df$normalized_features_vec) | ||||||
| 
 | 
 | ||||||
| X <- do.call(rbind, neurobiber_df$features_vec ) |  | ||||||
| 
 | 
 | ||||||
| set.seed(808) | set.seed(808) | ||||||
| 
 | 
 | ||||||
| @ -37,7 +38,7 @@ neurobiber_df$PC1 <- pca$x[,1] | |||||||
| neurobiber_df$PC2 <- pca$x[,2] | neurobiber_df$PC2 <- pca$x[,2] | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| ggplot(neurobiber_df, aes(x = PC1, y = PC2, color = dbcluster)) + | ggplot(neurobiber_df, aes(x = PC1, y = PC2, color = phase)) + | ||||||
|   geom_point(size = 2, alpha = 0.7) + |   geom_point(size = 2, alpha = 0.7) + | ||||||
|   theme_minimal() + |   theme_minimal() + | ||||||
|   labs(title = "Across-case comment clusters (DBSCAN) by cross-case PCA", |   labs(title = "Across-case comment clusters (DBSCAN) by cross-case PCA", | ||||||
| @ -53,9 +54,27 @@ ggplot(neurobiber_df, aes(x = phase, y=dbcluster, fill=AuthorWMFAffil)) + | |||||||
|        y = "Neurobiber feature vector cluster (DBSCAN)") + |        y = "Neurobiber feature vector cluster (DBSCAN)") + | ||||||
|   facet_wrap(~ source) |   facet_wrap(~ source) | ||||||
| 
 | 
 | ||||||
| cluster_means <- aggregate(X, by = list(Cluster = neurobiber_df$dbcluster), FUN = mean) | cluster_means <- aggregate( | ||||||
| rownames(cluster_means) <- paste0("Cluster_", cluster_means$Cluster) |   X, | ||||||
| cluster_means <- cluster_means[,-1] # Remove cluster label column |   by = list( | ||||||
|  |     WMFAffil = neurobiber_df$AuthorWMFAffil, | ||||||
|  |     phase = neurobiber_df$phase, | ||||||
|  |     comment_type = neurobiber_df$comment_type, | ||||||
|  |     source= neurobiber_df$source | ||||||
|  |   ), | ||||||
|  |   FUN = mean | ||||||
|  | ) | ||||||
|  | 
 | ||||||
|  | rownames(cluster_means) <- apply( | ||||||
|  |   cluster_means[, c("WMFAffil", "phase", "comment_type", "source")], 1, | ||||||
|  |   function(x) paste(x, collapse = "_") | ||||||
|  | ) | ||||||
|  | 
 | ||||||
|  | cluster_means <- cluster_means[, !(names(cluster_means) %in% c("WMFAffil", "phase", "comment_type", "source"))] | ||||||
|  | 
 | ||||||
|  | #cluster_means <- aggregate(X, by = list(Cluster = neurobiber_df$AuthorWMFAffil), FUN = mean) | ||||||
|  | #rownames(cluster_means) <- paste0("Cluster_", cluster_means$Cluster) | ||||||
|  | #cluster_means <- cluster_means[,-1] # Remove cluster label column | ||||||
| 
 | 
 | ||||||
| BIBER_FEATURES <- c( | BIBER_FEATURES <- c( | ||||||
|   "BIN_QUAN","BIN_QUPR","BIN_AMP","BIN_PASS","BIN_XX0","BIN_JJ", |   "BIN_QUAN","BIN_QUPR","BIN_AMP","BIN_PASS","BIN_XX0","BIN_JJ", | ||||||
| @ -75,10 +94,11 @@ BIBER_FEATURES <- c( | |||||||
|   "BIN_QUES","BIN_QUOT","BIN_AT","BIN_SBJP","BIN_URL","BIN_WH", |   "BIN_QUES","BIN_QUOT","BIN_AT","BIN_SBJP","BIN_URL","BIN_WH", | ||||||
|   "BIN_INDA","BIN_ACCU","BIN_PGAS","BIN_CMADJ","BIN_SPADJ","BIN_X" |   "BIN_INDA","BIN_ACCU","BIN_PGAS","BIN_CMADJ","BIN_SPADJ","BIN_X" | ||||||
| ) | ) | ||||||
| colnames(cluster_means) <- BIBER_FEATURES | BIBER_FEATURES_NO_BIN <- gsub("^BIN_", "", BIBER_FEATURES) | ||||||
|  | colnames(cluster_means) <- BIBER_FEATURES_NO_BIN | ||||||
| library(pheatmap) | library(pheatmap) | ||||||
| pheatmap(cluster_means,  | pheatmap(cluster_means,  | ||||||
|          cluster_rows = FALSE,    # Now features are clustered (rows) |          cluster_rows = FALSE,    # Now features are clustered (rows) | ||||||
|          cluster_cols = FALSE,   # Clusters (columns) are not clustered |          cluster_cols = FALSE, | ||||||
|          scale = "row")         # Standardize features |          scale='none')         # Standardize features | ||||||
| 
 | 
 | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user