updating some of the scripts for PCA analysis
This commit is contained in:
parent
f146016eac
commit
b198781aa0
@ -59,6 +59,7 @@ neurobiber_subcomment_pca_df$comment_type <- "subcomment"
|
||||
#clean the messages
|
||||
neurobiber_description_pca_df$cleaned_comment <- sapply(neurobiber_description_pca_df$text, preprocess_comment)
|
||||
neurobiber_subcomment_pca_df$cleaned_comment <- sapply(neurobiber_subcomment_pca_df$text, preprocess_comment)
|
||||
total_joined$cleaned_comment <- sapply(total_joined$text, preprocess_comment)
|
||||
|
||||
subcomment_joined <- subcomment_joined %>%
|
||||
mutate(pair_in_description = (paste(AuthorPHID, TaskPHID) %in%
|
||||
@ -77,13 +78,13 @@ discussion_anova_results <- neurobiber_subcomment_pca_df %>%
|
||||
discussion_anova_results
|
||||
|
||||
# look at the representative comments for PC1 and PC2
|
||||
top5 <- neurobiber_description_pca_df %>%
|
||||
arrange(desc(PC2)) %>%
|
||||
top5 <- total_joined %>%
|
||||
arrange(desc(PC4)) %>%
|
||||
slice(300:310) %>%
|
||||
pull(cleaned_comment)
|
||||
|
||||
bottom5 <- neurobiber_description_pca_df %>%
|
||||
arrange(PC2) %>%
|
||||
bottom5 <- total_joined %>%
|
||||
arrange(PC4) %>%
|
||||
slice(300:310) %>%
|
||||
pull(cleaned_comment)
|
||||
|
||||
@ -130,18 +131,17 @@ description_sampled_authors <- description_joined %>%
|
||||
description_sub_sample <- description_joined %>%
|
||||
filter(AuthorPHID %in% description_sampled_authors)
|
||||
|
||||
ggplot(description_sub_sample, aes(x = PC2, y = PC1, fill = AuthorPHID)) +
|
||||
ggplot(total_joined, aes(x = PC4, y = PC3, fill = comment_type)) +
|
||||
facet_grid(source~phase, scales="fixed") +
|
||||
geom_point(shape = 21, alpha=0.3, size=2) +
|
||||
xlim(-30, 30) +
|
||||
ylim(-30, 30) +
|
||||
scale_fill_brewer(palette = "Set1") +
|
||||
scale_fill_viridis_d() +
|
||||
theme_minimal() +
|
||||
guides(fill = "none") +
|
||||
labs(
|
||||
title = "PCs for Task Comments (Faceted by source and phase)",
|
||||
x = "PC2",
|
||||
y = "PC1",
|
||||
x = "PC4",
|
||||
y = "PC3",
|
||||
)
|
||||
|
||||
priority_order <- c("Unbreak Now!", "High", "Medium", "Low", "Lowest", "Needs Triage")
|
||||
@ -153,18 +153,18 @@ description_joined <- description_joined %>%
|
||||
mutate(priority = factor(priority.y, levels = priority_order))
|
||||
|
||||
ggplot(total_joined, aes(
|
||||
x = PC1, # x-axis grouping
|
||||
y = PC2,
|
||||
fill = comment_type
|
||||
x = as.factor(comment_type), # x-axis grouping
|
||||
y = PC3,
|
||||
fill = isAuthorWMF
|
||||
)) +
|
||||
ylim(-20, 20) +
|
||||
ylim(-30, 30) +
|
||||
geom_boxplot(alpha = 0.7, position = position_dodge(width = 0.9)) +
|
||||
facet_grid(. ~ source, scales = "fixed") + # Facet by source; adjust as needed
|
||||
scale_fill_viridis_d() +
|
||||
theme_minimal() +
|
||||
labs(
|
||||
title = "Boxplot of PC2 for Task Descriptions",
|
||||
x = "Task priority",
|
||||
y = "PC2",
|
||||
title = "Boxplot of PC4",
|
||||
x = "Comment_type",
|
||||
y = "PC4",
|
||||
fill = "isAuthorWMF?"
|
||||
)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user