updating some of the scripts for PCA analysis
This commit is contained in:
parent
f146016eac
commit
b198781aa0
@ -59,6 +59,7 @@ neurobiber_subcomment_pca_df$comment_type <- "subcomment"
|
|||||||
#clean the messages
|
#clean the messages
|
||||||
neurobiber_description_pca_df$cleaned_comment <- sapply(neurobiber_description_pca_df$text, preprocess_comment)
|
neurobiber_description_pca_df$cleaned_comment <- sapply(neurobiber_description_pca_df$text, preprocess_comment)
|
||||||
neurobiber_subcomment_pca_df$cleaned_comment <- sapply(neurobiber_subcomment_pca_df$text, preprocess_comment)
|
neurobiber_subcomment_pca_df$cleaned_comment <- sapply(neurobiber_subcomment_pca_df$text, preprocess_comment)
|
||||||
|
total_joined$cleaned_comment <- sapply(total_joined$text, preprocess_comment)
|
||||||
|
|
||||||
subcomment_joined <- subcomment_joined %>%
|
subcomment_joined <- subcomment_joined %>%
|
||||||
mutate(pair_in_description = (paste(AuthorPHID, TaskPHID) %in%
|
mutate(pair_in_description = (paste(AuthorPHID, TaskPHID) %in%
|
||||||
@ -77,13 +78,13 @@ discussion_anova_results <- neurobiber_subcomment_pca_df %>%
|
|||||||
discussion_anova_results
|
discussion_anova_results
|
||||||
|
|
||||||
# look at the representative comments for PC1 and PC2
|
# look at the representative comments for PC1 and PC2
|
||||||
top5 <- neurobiber_description_pca_df %>%
|
top5 <- total_joined %>%
|
||||||
arrange(desc(PC2)) %>%
|
arrange(desc(PC4)) %>%
|
||||||
slice(300:310) %>%
|
slice(300:310) %>%
|
||||||
pull(cleaned_comment)
|
pull(cleaned_comment)
|
||||||
|
|
||||||
bottom5 <- neurobiber_description_pca_df %>%
|
bottom5 <- total_joined %>%
|
||||||
arrange(PC2) %>%
|
arrange(PC4) %>%
|
||||||
slice(300:310) %>%
|
slice(300:310) %>%
|
||||||
pull(cleaned_comment)
|
pull(cleaned_comment)
|
||||||
|
|
||||||
@ -130,18 +131,17 @@ description_sampled_authors <- description_joined %>%
|
|||||||
description_sub_sample <- description_joined %>%
|
description_sub_sample <- description_joined %>%
|
||||||
filter(AuthorPHID %in% description_sampled_authors)
|
filter(AuthorPHID %in% description_sampled_authors)
|
||||||
|
|
||||||
ggplot(description_sub_sample, aes(x = PC2, y = PC1, fill = AuthorPHID)) +
|
ggplot(total_joined, aes(x = PC4, y = PC3, fill = comment_type)) +
|
||||||
facet_grid(source~phase, scales="fixed") +
|
facet_grid(source~phase, scales="fixed") +
|
||||||
geom_point(shape = 21, alpha=0.3, size=2) +
|
geom_point(shape = 21, alpha=0.3, size=2) +
|
||||||
xlim(-30, 30) +
|
xlim(-30, 30) +
|
||||||
ylim(-30, 30) +
|
ylim(-30, 30) +
|
||||||
scale_fill_brewer(palette = "Set1") +
|
scale_fill_viridis_d() +
|
||||||
theme_minimal() +
|
theme_minimal() +
|
||||||
guides(fill = "none") +
|
|
||||||
labs(
|
labs(
|
||||||
title = "PCs for Task Comments (Faceted by source and phase)",
|
title = "PCs for Task Comments (Faceted by source and phase)",
|
||||||
x = "PC2",
|
x = "PC4",
|
||||||
y = "PC1",
|
y = "PC3",
|
||||||
)
|
)
|
||||||
|
|
||||||
priority_order <- c("Unbreak Now!", "High", "Medium", "Low", "Lowest", "Needs Triage")
|
priority_order <- c("Unbreak Now!", "High", "Medium", "Low", "Lowest", "Needs Triage")
|
||||||
@ -153,18 +153,18 @@ description_joined <- description_joined %>%
|
|||||||
mutate(priority = factor(priority.y, levels = priority_order))
|
mutate(priority = factor(priority.y, levels = priority_order))
|
||||||
|
|
||||||
ggplot(total_joined, aes(
|
ggplot(total_joined, aes(
|
||||||
x = PC1, # x-axis grouping
|
x = as.factor(comment_type), # x-axis grouping
|
||||||
y = PC2,
|
y = PC3,
|
||||||
fill = comment_type
|
fill = isAuthorWMF
|
||||||
)) +
|
)) +
|
||||||
ylim(-20, 20) +
|
ylim(-30, 30) +
|
||||||
geom_boxplot(alpha = 0.7, position = position_dodge(width = 0.9)) +
|
geom_boxplot(alpha = 0.7, position = position_dodge(width = 0.9)) +
|
||||||
facet_grid(. ~ source, scales = "fixed") + # Facet by source; adjust as needed
|
facet_grid(. ~ source, scales = "fixed") + # Facet by source; adjust as needed
|
||||||
scale_fill_viridis_d() +
|
scale_fill_viridis_d() +
|
||||||
theme_minimal() +
|
theme_minimal() +
|
||||||
labs(
|
labs(
|
||||||
title = "Boxplot of PC2 for Task Descriptions",
|
title = "Boxplot of PC4",
|
||||||
x = "Task priority",
|
x = "Comment_type",
|
||||||
y = "PC2",
|
y = "PC4",
|
||||||
fill = "isAuthorWMF?"
|
fill = "isAuthorWMF?"
|
||||||
)
|
)
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user