1
0

updating some of the scripts for PCA analysis

This commit is contained in:
Matthew Gaughan 2025-10-20 11:09:04 -07:00
parent f146016eac
commit b198781aa0

View File

@ -59,6 +59,7 @@ neurobiber_subcomment_pca_df$comment_type <- "subcomment"
#clean the messages
neurobiber_description_pca_df$cleaned_comment <- sapply(neurobiber_description_pca_df$text, preprocess_comment)
neurobiber_subcomment_pca_df$cleaned_comment <- sapply(neurobiber_subcomment_pca_df$text, preprocess_comment)
total_joined$cleaned_comment <- sapply(total_joined$text, preprocess_comment)
subcomment_joined <- subcomment_joined %>%
mutate(pair_in_description = (paste(AuthorPHID, TaskPHID) %in%
@ -77,13 +78,13 @@ discussion_anova_results <- neurobiber_subcomment_pca_df %>%
discussion_anova_results
# look at the representative comments for PC1 and PC2
top5 <- neurobiber_description_pca_df %>%
arrange(desc(PC2)) %>%
top5 <- total_joined %>%
arrange(desc(PC4)) %>%
slice(300:310) %>%
pull(cleaned_comment)
bottom5 <- neurobiber_description_pca_df %>%
arrange(PC2) %>%
bottom5 <- total_joined %>%
arrange(PC4) %>%
slice(300:310) %>%
pull(cleaned_comment)
@ -130,18 +131,17 @@ description_sampled_authors <- description_joined %>%
description_sub_sample <- description_joined %>%
filter(AuthorPHID %in% description_sampled_authors)
ggplot(description_sub_sample, aes(x = PC2, y = PC1, fill = AuthorPHID)) +
ggplot(total_joined, aes(x = PC4, y = PC3, fill = comment_type)) +
facet_grid(source~phase, scales="fixed") +
geom_point(shape = 21, alpha=0.3, size=2) +
xlim(-30, 30) +
ylim(-30, 30) +
scale_fill_brewer(palette = "Set1") +
scale_fill_viridis_d() +
theme_minimal() +
guides(fill = "none") +
labs(
title = "PCs for Task Comments (Faceted by source and phase)",
x = "PC2",
y = "PC1",
x = "PC4",
y = "PC3",
)
priority_order <- c("Unbreak Now!", "High", "Medium", "Low", "Lowest", "Needs Triage")
@ -153,18 +153,18 @@ description_joined <- description_joined %>%
mutate(priority = factor(priority.y, levels = priority_order))
ggplot(total_joined, aes(
x = PC1, # x-axis grouping
y = PC2,
fill = comment_type
x = as.factor(comment_type), # x-axis grouping
y = PC3,
fill = isAuthorWMF
)) +
ylim(-20, 20) +
ylim(-30, 30) +
geom_boxplot(alpha = 0.7, position = position_dodge(width = 0.9)) +
facet_grid(. ~ source, scales = "fixed") + # Facet by source; adjust as needed
scale_fill_viridis_d() +
theme_minimal() +
labs(
title = "Boxplot of PC2 for Task Descriptions",
x = "Task priority",
y = "PC2",
title = "Boxplot of PC4",
x = "Comment_type",
y = "PC4",
fill = "isAuthorWMF?"
)