library(tidyverse) neurobiber_description_pca_csv <-"~/p2/quest/090425_description_PCA_df.csv" neurobiber_description_pca_df <- read.csv(neurobiber_description_pca_csv , header = TRUE) neurobiber_subcomment_pca_csv <-"~/p2/quest/090425_subcomment_PCA_df.csv" neurobiber_subcomment_pca_df <- read.csv(neurobiber_subcomment_pca_csv , header = TRUE) # look at correlation between PC1, PC2, and different outcome variables library(dplyr) description_anova_results <- neurobiber_description_pca_df %>% group_by(source) %>% group_map(~ summary(aov(PC2 ~ phase, data = .x)), .keep = TRUE) description_anova_results discussion_anova_results <- neurobiber_subcomment_pca_df %>% group_by(source) %>% group_map(~ summary(aov(PC2 ~ phase, data = .x)), .keep = TRUE) discussion_anova_results # look at the representative comments for PC1 and PC2 top5 <- neurobiber_subcomment_pca_df %>% filter(source=="c2") |> arrange(desc(PC2)) %>% slice(15:30) %>% pull(text) bottom5 <- neurobiber_subcomment_pca_df %>% filter(source=="c2") |> arrange(PC2) %>% slice(15:30) %>% pull(text) cat("Top 15:30 comment_text by score:\n") print(top5) cat("\nBottom 15:30 comment_text by score:\n") print(bottom5) aggregated_neurobiber_description_pca_df <- neurobiber_description_pca_df |> group_by(AuthorWMFAffil, week_index, source, priority) %>% summarise(mean_PC1 = median(PC1), mean_PC2 = median(PC2), mean_PC3 = median(PC3), mean_PC4 = median(PC4), mean_PC5 = median(PC5)) library(scales) library(ggplot2) affiliationColors <- setNames( c('#5da2d8', '#c7756a') ,c("False", "True")) long_df <- aggregated_neurobiber_description_pca_df %>% tidyr::pivot_longer( cols = starts_with("mean_PC"), names_to = "PC", values_to = "PC_value" ) ggplot(long_df, aes(x = week_index, y = PC_value, color = AuthorWMFAffil, group = AuthorWMFAffil)) + geom_line(size = 1) + facet_grid(PC ~ source, scales = "free_y") + scale_color_manual(values = affiliationColors, name = "WMF Affiliation") + scale_x_continuous(breaks = pretty_breaks()) + scale_y_continuous(limits = c(-10, 10)) + labs(x = "Week Index", y = "Mean PC Value", title = "Weekly Median PC Values by Source and PC, Colored by WMF Affiliation") + theme_minimal(base_size = 14) + theme(legend.position = "top")