24 lines
765 B
R
24 lines
765 B
R
library(tidyverse)
|
|
library(stringr)
|
|
library(tidyr)
|
|
library(dplyr)
|
|
library(purrr)
|
|
# TODO
|
|
# join the label data with the existing data from 0714 master
|
|
# download and set up DSL library
|
|
# figure out how to use the sentence-level variables
|
|
# get the categorical variables encoded as integers, then wrapped as factors
|
|
# figure out power at 200, 400, 500, 750, and 1000
|
|
#joining sentences with their
|
|
olmo_categorization_csv <-"~/dsl/inter_090725_sent_cats.csv"
|
|
sl_olmo_categorization_df <- read.csv(olmo_categorization_csv, header = TRUE)
|
|
|
|
main_csv <- "~/p2/071425_master_discussion_data.csv"
|
|
main_df <- read.csv(main_csv, header = TRUE)
|
|
|
|
joined_df <- left_join(
|
|
sl_olmo_categorization_df,
|
|
main_df %>% select(id, AuthorPHID),
|
|
by = "id"
|
|
)
|