updating with dbscan clustering etc.
This commit is contained in:
parent
90e69975d2
commit
2e0665488c
@ -24,32 +24,36 @@ table(neurobiber_df$source)
|
||||
# as.factor(kmeans(X_sub, centers = 50)$cluster)
|
||||
# }) %>%
|
||||
# ungroup()
|
||||
library(dbscan)
|
||||
dbscan_result <- dbscan(X, eps = 0.5, minPts = 97)
|
||||
neurobiber_df$dbcluster <- as.factor(ifelse(dbscan_result$cluster == -1, "Noise", dbscan_result$cluster))
|
||||
|
||||
kmeans_result <- kmeans(X, centers = 10)
|
||||
neurobiber_df$cluster <- as.factor(kmeans_result$cluster)
|
||||
table(neurobiber_df$cluster)
|
||||
table(neurobiber_df$dbcluster)
|
||||
|
||||
pca <- prcomp(X, center = TRUE, scale. = TRUE)
|
||||
neurobiber_df$PC1 <- pca$x[,1]
|
||||
neurobiber_df$PC2 <- pca$x[,2]
|
||||
|
||||
|
||||
ggplot(neurobiber_df, aes(x = PC1, y = PC2, color = cluster)) +
|
||||
ggplot(neurobiber_df, aes(x = PC1, y = PC2, color = dbcluster)) +
|
||||
geom_point(size = 2, alpha = 0.7) +
|
||||
theme_minimal() +
|
||||
labs(title = "Within case comment clusters (kmeans) by cross-case PCA",
|
||||
labs(title = "Across-case comment clusters (DBSCAN) by cross-case PCA",
|
||||
x = "Principal Component 1",
|
||||
y = "Principal Component 2") +
|
||||
facet_wrap(~ source)
|
||||
|
||||
ggplot(neurobiber_df, aes(x = phase, y=cluster, fill=AuthorWMFAffil)) +
|
||||
ggplot(neurobiber_df, aes(x = phase, y=dbcluster, fill=AuthorWMFAffil)) +
|
||||
geom_violin(trim = FALSE, position = position_dodge(width = 0.8), alpha = 0.6) +
|
||||
theme_minimal() +
|
||||
labs(title = "Across-case comment clusters by feature deployment phase",
|
||||
x = "Feature deployment phase",
|
||||
y = "Neurobiber feature vector cluster (kmeans)") +
|
||||
y = "Neurobiber feature vector cluster (DBSCAN)") +
|
||||
facet_wrap(~ source)
|
||||
|
||||
cluster_means <- aggregate(X, by = list(Cluster = neurobiber_df$cluster), FUN = mean)
|
||||
cluster_means <- aggregate(X, by = list(Cluster = neurobiber_df$dbcluster), FUN = mean)
|
||||
rownames(cluster_means) <- paste0("Cluster_", cluster_means$Cluster)
|
||||
cluster_means <- cluster_means[,-1] # Remove cluster label column
|
||||
|
||||
@ -75,6 +79,6 @@ colnames(cluster_means) <- BIBER_FEATURES
|
||||
library(pheatmap)
|
||||
pheatmap(cluster_means,
|
||||
cluster_rows = FALSE, # Now features are clustered (rows)
|
||||
cluster_cols = TRUE, # Clusters (columns) are not clustered
|
||||
cluster_cols = FALSE, # Clusters (columns) are not clustered
|
||||
scale = "row") # Standardize features
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user