updating with dbscan clustering etc.
This commit is contained in:
		
							parent
							
								
									90e69975d2
								
							
						
					
					
						commit
						2e0665488c
					
				| @ -24,32 +24,36 @@ table(neurobiber_df$source) | ||||
| #    as.factor(kmeans(X_sub, centers = 50)$cluster) | ||||
| #  }) %>% | ||||
| #  ungroup() | ||||
| library(dbscan) | ||||
| dbscan_result <- dbscan(X, eps = 0.5, minPts = 97) | ||||
| neurobiber_df$dbcluster <- as.factor(ifelse(dbscan_result$cluster == -1, "Noise", dbscan_result$cluster)) | ||||
| 
 | ||||
| kmeans_result <- kmeans(X, centers = 10) | ||||
| neurobiber_df$cluster <- as.factor(kmeans_result$cluster) | ||||
| table(neurobiber_df$cluster) | ||||
| table(neurobiber_df$dbcluster) | ||||
| 
 | ||||
| pca <- prcomp(X, center = TRUE, scale. = TRUE) | ||||
| neurobiber_df$PC1 <- pca$x[,1] | ||||
| neurobiber_df$PC2 <- pca$x[,2] | ||||
| 
 | ||||
| 
 | ||||
| ggplot(neurobiber_df, aes(x = PC1, y = PC2, color = cluster)) + | ||||
| ggplot(neurobiber_df, aes(x = PC1, y = PC2, color = dbcluster)) + | ||||
|   geom_point(size = 2, alpha = 0.7) + | ||||
|   theme_minimal() + | ||||
|   labs(title = "Within case comment clusters (kmeans) by cross-case PCA", | ||||
|   labs(title = "Across-case comment clusters (DBSCAN) by cross-case PCA", | ||||
|        x = "Principal Component 1", | ||||
|        y = "Principal Component 2") + | ||||
|   facet_wrap(~ source) | ||||
| 
 | ||||
| ggplot(neurobiber_df, aes(x = phase, y=cluster, fill=AuthorWMFAffil)) + | ||||
| ggplot(neurobiber_df, aes(x = phase, y=dbcluster, fill=AuthorWMFAffil)) + | ||||
|   geom_violin(trim = FALSE, position = position_dodge(width = 0.8), alpha = 0.6) + | ||||
|   theme_minimal() + | ||||
|   labs(title = "Across-case comment clusters by feature deployment phase", | ||||
|        x = "Feature deployment phase", | ||||
|        y = "Neurobiber feature vector cluster (kmeans)") + | ||||
|        y = "Neurobiber feature vector cluster (DBSCAN)") + | ||||
|   facet_wrap(~ source) | ||||
| 
 | ||||
| cluster_means <- aggregate(X, by = list(Cluster = neurobiber_df$cluster), FUN = mean) | ||||
| cluster_means <- aggregate(X, by = list(Cluster = neurobiber_df$dbcluster), FUN = mean) | ||||
| rownames(cluster_means) <- paste0("Cluster_", cluster_means$Cluster) | ||||
| cluster_means <- cluster_means[,-1] # Remove cluster label column | ||||
| 
 | ||||
| @ -75,6 +79,6 @@ colnames(cluster_means) <- BIBER_FEATURES | ||||
| library(pheatmap) | ||||
| pheatmap(cluster_means,  | ||||
|          cluster_rows = FALSE,    # Now features are clustered (rows) | ||||
|          cluster_cols = TRUE,   # Clusters (columns) are not clustered | ||||
|          cluster_cols = FALSE,   # Clusters (columns) are not clustered | ||||
|          scale = "row")         # Standardize features | ||||
| 
 | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user