library(tidyverse) library(stringr) library(tidyr) library(dplyr) library(purrr) # TODO # join the label data with the existing data from 0714 master # download and set up DSL library # figure out how to use the sentence-level variables # get the categorical variables encoded as integers, then wrapped as factors # figure out power at 200, 400, 500, 750, and 1000 #joining sentences with their olmo_categorization_csv <-"~/dsl/inter_090725_sent_cats.csv" sl_olmo_categorization_df <- read.csv(olmo_categorization_csv, header = TRUE) main_csv <- "~/p2/071425_master_discussion_data.csv" main_df <- read.csv(main_csv, header = TRUE) joined_df <- left_join( sl_olmo_categorization_df, main_df %>% select(id, AuthorPHID), by = "id" )