1
0
mw-lifecycle-analysis/p1/phab_analysis/m2-viz-coreference.R
2025-07-11 15:14:24 -05:00

24 lines
1019 B
R

library(tidyverse)
c1_count <-"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case1/0312_resolved_ve_phab_comments.csv"
c1_input_df <- read.csv(c1_count , header = TRUE)
c2_count <-"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case2/051825_coref_resolved_dep_trees.csv"
c2_input_df <- read.csv(c2_count , header = TRUE)
c3_count <-"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case3/050825_coref-rel-final.csv"
c3_input_df <- read.csv(c3_count , header = TRUE)
# Add a column to each dataframe to label them
c1_input_df <- c1_input_df |> mutate(source = "c1")
c2_input_df <- c2_input_df %>% mutate(source = "c2")
c3_input_df <- c3_input_df %>% mutate(source = "c3")
# Combine the dataframes into one
combined_df <- bind_rows(c1_input_df, c2_input_df, c3_input_df)
library(stringr)
combined_df$user_in_text <- str_count(combined_df$text, regex("user", ignore_case = TRUE))
combined_df$user_in_resolved_text <- str_count(combined_df$resolved_text, regex("user", ignore_case = TRUE))