54 lines
1.3 KiB
R
54 lines
1.3 KiB
R
library(tidyverse)
|
|
#library(dsl)
|
|
library(dplyr)
|
|
dsl_csv <-"~/dsl/121625_DSL_frame.csv"
|
|
dsl_df <- read.csv(dsl_csv, header = TRUE)
|
|
|
|
dsl_df <- dsl_df |>
|
|
filter(isAuthorWMF != "BzImport")
|
|
|
|
dsl_df_long <- dsl_df %>%
|
|
pivot_longer(
|
|
cols = c(olmo_EP_prop_adac, olmo_RK_prop_adac, olmo_TSOL_prop_adac),
|
|
names_to = "tag",
|
|
values_to = "proportion"
|
|
) %>%
|
|
mutate(tag = gsub("olmo_|_prop_adac", "", tag),
|
|
tag = case_when(
|
|
tag == "EP" ~ "Existent Problem",
|
|
tag == "RK" ~ "Record Keeping",
|
|
tag =="TSOL" ~ "Solutions"
|
|
))
|
|
|
|
olmo_comparison <- ggplot(
|
|
dsl_df_long,
|
|
aes(
|
|
x = tag,
|
|
y = proportion,
|
|
fill = isAuthorWMF,
|
|
)
|
|
) +
|
|
facet_grid(source ~ .,
|
|
scales = "free_y",
|
|
labeller = labeller(source = c("c1" = "VisualEditor",
|
|
"c2" = "HTTPS-login",
|
|
"c3" = "HTTP-deprecation"))) +
|
|
geom_boxplot() +
|
|
theme_minimal() +
|
|
scale_fill_viridis_d() +
|
|
labs(
|
|
x = "Tag",
|
|
y = "% of sentences tagged",
|
|
color = "Is Author WMF?",
|
|
fill = "Is Author WMF?"
|
|
) +
|
|
theme(legend.position = "top")
|
|
olmo_comparison
|
|
ggsave(
|
|
filename = "121625_machine_label_comparison.png",
|
|
plot = olmo_comparison,
|
|
width = 12, # inches
|
|
height = 6, # inches
|
|
dpi = 800 # high resolution
|
|
)
|