adding in analysis of biberplus vectors
This commit is contained in:
parent
b0584ec1be
commit
a08a49d04e
@ -1,15 +1,16 @@
|
|||||||
library(tidyverse)
|
library(tidyverse)
|
||||||
|
|
||||||
neurobiber_csv <-"~/p2/quest/071525_neurobiber_labels.csv"
|
neurobiber_csv <-"~/p2/quest/072325_biberplus_labels.csv"
|
||||||
neurobiber_df <- read.csv(neurobiber_csv , header = TRUE)
|
neurobiber_df <- read.csv(neurobiber_csv , header = TRUE)
|
||||||
|
|
||||||
neurobiber_df$features_vec <- lapply(neurobiber_df$neurobiber_preds, function(x) {
|
normalized_cols <- grep("^normalized_", names(neurobiber_df), value = TRUE)
|
||||||
x <- gsub("\\[|\\]", "", x)
|
|
||||||
x <- trimws(x)
|
neurobiber_df$normalized_features_vec <- lapply(
|
||||||
as.numeric(unlist(strsplit(x, "\\s+")))
|
asplit(neurobiber_df[, normalized_cols], 1), as.numeric
|
||||||
})
|
)
|
||||||
|
|
||||||
|
X <- do.call(rbind, neurobiber_df$normalized_features_vec)
|
||||||
|
|
||||||
X <- do.call(rbind, neurobiber_df$features_vec )
|
|
||||||
|
|
||||||
set.seed(808)
|
set.seed(808)
|
||||||
|
|
||||||
@ -37,7 +38,7 @@ neurobiber_df$PC1 <- pca$x[,1]
|
|||||||
neurobiber_df$PC2 <- pca$x[,2]
|
neurobiber_df$PC2 <- pca$x[,2]
|
||||||
|
|
||||||
|
|
||||||
ggplot(neurobiber_df, aes(x = PC1, y = PC2, color = dbcluster)) +
|
ggplot(neurobiber_df, aes(x = PC1, y = PC2, color = phase)) +
|
||||||
geom_point(size = 2, alpha = 0.7) +
|
geom_point(size = 2, alpha = 0.7) +
|
||||||
theme_minimal() +
|
theme_minimal() +
|
||||||
labs(title = "Across-case comment clusters (DBSCAN) by cross-case PCA",
|
labs(title = "Across-case comment clusters (DBSCAN) by cross-case PCA",
|
||||||
@ -53,9 +54,27 @@ ggplot(neurobiber_df, aes(x = phase, y=dbcluster, fill=AuthorWMFAffil)) +
|
|||||||
y = "Neurobiber feature vector cluster (DBSCAN)") +
|
y = "Neurobiber feature vector cluster (DBSCAN)") +
|
||||||
facet_wrap(~ source)
|
facet_wrap(~ source)
|
||||||
|
|
||||||
cluster_means <- aggregate(X, by = list(Cluster = neurobiber_df$dbcluster), FUN = mean)
|
cluster_means <- aggregate(
|
||||||
rownames(cluster_means) <- paste0("Cluster_", cluster_means$Cluster)
|
X,
|
||||||
cluster_means <- cluster_means[,-1] # Remove cluster label column
|
by = list(
|
||||||
|
WMFAffil = neurobiber_df$AuthorWMFAffil,
|
||||||
|
phase = neurobiber_df$phase,
|
||||||
|
comment_type = neurobiber_df$comment_type,
|
||||||
|
source= neurobiber_df$source
|
||||||
|
),
|
||||||
|
FUN = mean
|
||||||
|
)
|
||||||
|
|
||||||
|
rownames(cluster_means) <- apply(
|
||||||
|
cluster_means[, c("WMFAffil", "phase", "comment_type", "source")], 1,
|
||||||
|
function(x) paste(x, collapse = "_")
|
||||||
|
)
|
||||||
|
|
||||||
|
cluster_means <- cluster_means[, !(names(cluster_means) %in% c("WMFAffil", "phase", "comment_type", "source"))]
|
||||||
|
|
||||||
|
#cluster_means <- aggregate(X, by = list(Cluster = neurobiber_df$AuthorWMFAffil), FUN = mean)
|
||||||
|
#rownames(cluster_means) <- paste0("Cluster_", cluster_means$Cluster)
|
||||||
|
#cluster_means <- cluster_means[,-1] # Remove cluster label column
|
||||||
|
|
||||||
BIBER_FEATURES <- c(
|
BIBER_FEATURES <- c(
|
||||||
"BIN_QUAN","BIN_QUPR","BIN_AMP","BIN_PASS","BIN_XX0","BIN_JJ",
|
"BIN_QUAN","BIN_QUPR","BIN_AMP","BIN_PASS","BIN_XX0","BIN_JJ",
|
||||||
@ -75,10 +94,11 @@ BIBER_FEATURES <- c(
|
|||||||
"BIN_QUES","BIN_QUOT","BIN_AT","BIN_SBJP","BIN_URL","BIN_WH",
|
"BIN_QUES","BIN_QUOT","BIN_AT","BIN_SBJP","BIN_URL","BIN_WH",
|
||||||
"BIN_INDA","BIN_ACCU","BIN_PGAS","BIN_CMADJ","BIN_SPADJ","BIN_X"
|
"BIN_INDA","BIN_ACCU","BIN_PGAS","BIN_CMADJ","BIN_SPADJ","BIN_X"
|
||||||
)
|
)
|
||||||
colnames(cluster_means) <- BIBER_FEATURES
|
BIBER_FEATURES_NO_BIN <- gsub("^BIN_", "", BIBER_FEATURES)
|
||||||
|
colnames(cluster_means) <- BIBER_FEATURES_NO_BIN
|
||||||
library(pheatmap)
|
library(pheatmap)
|
||||||
pheatmap(cluster_means,
|
pheatmap(cluster_means,
|
||||||
cluster_rows = FALSE, # Now features are clustered (rows)
|
cluster_rows = FALSE, # Now features are clustered (rows)
|
||||||
cluster_cols = FALSE, # Clusters (columns) are not clustered
|
cluster_cols = FALSE,
|
||||||
scale = "row") # Standardize features
|
scale='none') # Standardize features
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user