1
0
mw-lifecycle-analysis/dsl/dsl_pp_power.R
2025-09-07 13:21:45 -07:00

24 lines
765 B
R

library(tidyverse)
library(stringr)
library(tidyr)
library(dplyr)
library(purrr)
# TODO
# join the label data with the existing data from 0714 master
# download and set up DSL library
# figure out how to use the sentence-level variables
# get the categorical variables encoded as integers, then wrapped as factors
# figure out power at 200, 400, 500, 750, and 1000
#joining sentences with their
olmo_categorization_csv <-"~/dsl/inter_090725_sent_cats.csv"
sl_olmo_categorization_df <- read.csv(olmo_categorization_csv, header = TRUE)
main_csv <- "~/p2/071425_master_discussion_data.csv"
main_df <- read.csv(main_csv, header = TRUE)
joined_df <- left_join(
sl_olmo_categorization_df,
main_df %>% select(id, AuthorPHID),
by = "id"
)