mw-lifecycle-analysis/p2/quest/neurobiber_PCA_analysis.R

library(tidyverse)

neurobiber_description_pca_csv <-"~/p2/quest/090425_description_PCA_df.csv"
neurobiber_description_pca_df <- read.csv(neurobiber_description_pca_csv  , header = TRUE)

neurobiber_subcomment_pca_csv <-"~/p2/quest/090425_subcomment_PCA_df.csv"
neurobiber_subcomment_pca_df <- read.csv(neurobiber_subcomment_pca_csv  , header = TRUE)


# look at correlation between PC1, PC2, and different outcome variables
library(dplyr)
description_anova_results <- neurobiber_description_pca_df %>%
  group_by(source) %>%
  group_map(~ summary(aov(PC2 ~ phase, data = .x)), .keep = TRUE)
description_anova_results

discussion_anova_results <- neurobiber_subcomment_pca_df %>%
  group_by(source) %>%
  group_map(~ summary(aov(PC2 ~ phase, data = .x)), .keep = TRUE)
discussion_anova_results

# look at the representative comments for PC1 and PC2
top5 <- neurobiber_subcomment_pca_df %>%
  filter(source=="c2") |>
  arrange(desc(PC2)) %>%
  slice(15:30) %>%
  pull(text)

bottom5 <- neurobiber_subcomment_pca_df %>%
  filter(source=="c2") |>
  arrange(PC2) %>%
  slice(15:30) %>%
  pull(text)

cat("Top 15:30 comment_text by score:\n")
print(top5)

cat("\nBottom 15:30 comment_text by score:\n")
print(bottom5)


aggregated_neurobiber_description_pca_df <- neurobiber_description_pca_df |>
  group_by(AuthorWMFAffil, week_index, source, priority) %>%
  summarise(mean_PC1 = median(PC1),
            mean_PC2 = median(PC2),
            mean_PC3 = median(PC3),
            mean_PC4 = median(PC4),
            mean_PC5 = median(PC5))
library(scales)
library(ggplot2)


affiliationColors <-
  setNames( c('#5da2d8', '#c7756a')
            ,c("False", "True"))


long_df <- aggregated_neurobiber_description_pca_df %>%
  tidyr::pivot_longer(
    cols = starts_with("mean_PC"),
    names_to = "PC",
    values_to = "PC_value"
  )

ggplot(long_df, aes(x = week_index, y = PC_value, color = AuthorWMFAffil, group = AuthorWMFAffil)) +
  geom_line(size = 1) +
  facet_grid(PC ~ source, scales = "free_y") +
  scale_color_manual(values = affiliationColors, name = "WMF Affiliation") +
  scale_x_continuous(breaks = pretty_breaks()) +
  scale_y_continuous(limits = c(-10, 10)) +
  labs(x = "Week Index", y = "Mean PC Value",
       title = "Weekly Median PC Values by Source and PC, Colored by WMF Affiliation") +
  theme_minimal(base_size = 14) +
  theme(legend.position = "top")