1
0
mw-lifecycle-analysis/dsl/rq2_plot.R

54 lines
1.3 KiB
R

library(tidyverse)
#library(dsl)
library(dplyr)
dsl_csv <-"~/dsl/121625_DSL_frame.csv"
dsl_df <- read.csv(dsl_csv, header = TRUE)
dsl_df <- dsl_df |>
filter(isAuthorWMF != "BzImport")
dsl_df_long <- dsl_df %>%
pivot_longer(
cols = c(olmo_EP_prop_adac, olmo_RK_prop_adac, olmo_TSOL_prop_adac),
names_to = "tag",
values_to = "proportion"
) %>%
mutate(tag = gsub("olmo_|_prop_adac", "", tag),
tag = case_when(
tag == "EP" ~ "Existent Problem",
tag == "RK" ~ "Record Keeping",
tag =="TSOL" ~ "Solutions"
))
olmo_comparison <- ggplot(
dsl_df_long,
aes(
x = tag,
y = proportion,
fill = isAuthorWMF,
)
) +
facet_grid(source ~ .,
scales = "free_y",
labeller = labeller(source = c("c1" = "VisualEditor",
"c2" = "HTTPS-login",
"c3" = "HTTP-deprecation"))) +
geom_boxplot() +
theme_minimal() +
scale_fill_viridis_d() +
labs(
x = "Tag",
y = "% of sentences tagged",
color = "Is Author WMF?",
fill = "Is Author WMF?"
) +
theme(legend.position = "top")
olmo_comparison
ggsave(
filename = "121625_machine_label_comparison.png",
plot = olmo_comparison,
width = 12, # inches
height = 6, # inches
dpi = 800 # high resolution
)