library(tidyverse) #library(dsl) library(dplyr) dsl_csv <-"~/dsl/121625_DSL_frame.csv" dsl_df <- read.csv(dsl_csv, header = TRUE) dsl_df <- dsl_df |> filter(isAuthorWMF != "BzImport") dsl_df_long <- dsl_df %>% pivot_longer( cols = c(olmo_EP_prop_adac, olmo_RK_prop_adac, olmo_TSOL_prop_adac), names_to = "tag", values_to = "proportion" ) %>% mutate(tag = gsub("olmo_|_prop_adac", "", tag), tag = case_when( tag == "EP" ~ "Existent Problem", tag == "RK" ~ "Record Keeping", tag =="TSOL" ~ "Solutions" )) olmo_comparison <- ggplot( dsl_df_long, aes( x = tag, y = proportion, fill = isAuthorWMF, ) ) + facet_grid(source ~ ., scales = "free_y", labeller = labeller(source = c("c1" = "VisualEditor", "c2" = "HTTPS-login", "c3" = "HTTP-deprecation"))) + geom_boxplot() + theme_minimal() + scale_fill_viridis_d() + labs( x = "Tag", y = "% of sentences tagged", color = "Is Author WMF?", fill = "Is Author WMF?" ) + theme(legend.position = "top") olmo_comparison ggsave( filename = "121625_machine_label_comparison.png", plot = olmo_comparison, width = 12, # inches height = 6, # inches dpi = 800 # high resolution )