diff --git a/p2/quest/neurobiber_PCA_analysis.R b/p2/quest/neurobiber_PCA_analysis.R index 40f3296..c5fa668 100644 --- a/p2/quest/neurobiber_PCA_analysis.R +++ b/p2/quest/neurobiber_PCA_analysis.R @@ -59,6 +59,7 @@ neurobiber_subcomment_pca_df$comment_type <- "subcomment" #clean the messages neurobiber_description_pca_df$cleaned_comment <- sapply(neurobiber_description_pca_df$text, preprocess_comment) neurobiber_subcomment_pca_df$cleaned_comment <- sapply(neurobiber_subcomment_pca_df$text, preprocess_comment) +total_joined$cleaned_comment <- sapply(total_joined$text, preprocess_comment) subcomment_joined <- subcomment_joined %>% mutate(pair_in_description = (paste(AuthorPHID, TaskPHID) %in% @@ -77,13 +78,13 @@ discussion_anova_results <- neurobiber_subcomment_pca_df %>% discussion_anova_results # look at the representative comments for PC1 and PC2 -top5 <- neurobiber_description_pca_df %>% - arrange(desc(PC2)) %>% +top5 <- total_joined %>% + arrange(desc(PC4)) %>% slice(300:310) %>% pull(cleaned_comment) -bottom5 <- neurobiber_description_pca_df %>% - arrange(PC2) %>% +bottom5 <- total_joined %>% + arrange(PC4) %>% slice(300:310) %>% pull(cleaned_comment) @@ -130,18 +131,17 @@ description_sampled_authors <- description_joined %>% description_sub_sample <- description_joined %>% filter(AuthorPHID %in% description_sampled_authors) -ggplot(description_sub_sample, aes(x = PC2, y = PC1, fill = AuthorPHID)) + +ggplot(total_joined, aes(x = PC4, y = PC3, fill = comment_type)) + facet_grid(source~phase, scales="fixed") + geom_point(shape = 21, alpha=0.3, size=2) + xlim(-30, 30) + ylim(-30, 30) + - scale_fill_brewer(palette = "Set1") + + scale_fill_viridis_d() + theme_minimal() + - guides(fill = "none") + labs( title = "PCs for Task Comments (Faceted by source and phase)", - x = "PC2", - y = "PC1", + x = "PC4", + y = "PC3", ) priority_order <- c("Unbreak Now!", "High", "Medium", "Low", "Lowest", "Needs Triage") @@ -153,18 +153,18 @@ description_joined <- description_joined %>% mutate(priority = factor(priority.y, levels = priority_order)) ggplot(total_joined, aes( - x = PC1, # x-axis grouping - y = PC2, - fill = comment_type + x = as.factor(comment_type), # x-axis grouping + y = PC3, + fill = isAuthorWMF )) + - ylim(-20, 20) + + ylim(-30, 30) + geom_boxplot(alpha = 0.7, position = position_dodge(width = 0.9)) + facet_grid(. ~ source, scales = "fixed") + # Facet by source; adjust as needed scale_fill_viridis_d() + theme_minimal() + labs( - title = "Boxplot of PC2 for Task Descriptions", - x = "Task priority", - y = "PC2", + title = "Boxplot of PC4", + x = "Comment_type", + y = "PC4", fill = "isAuthorWMF?" )