1
0

updating figures for tentative printout

This commit is contained in:
Matthew Gaughan 2025-12-04 14:06:22 -08:00
parent d513e245b5
commit a9ec0b19ef
2 changed files with 83 additions and 11 deletions

View File

@ -4,20 +4,90 @@ library(dplyr)
dsl_csv <-"~/dsl/111725_DSL_frame.csv"
dsl_df <- read.csv(dsl_csv, header = TRUE)
dsl_df <- dsl_df |>
mutate(priority = factor(priority,
levels = rev(c("Unbreak Now!", "High", "Medium", "Low", "Lowest", "Needs Triage"))))
data_summary <- dsl_df %>%
group_by(week_index, priority, source) %>%
summarise(count = n(), .groups = 'drop') |>
group_by(week_index, source) %>%
mutate(proportion = count / sum(count)) %>%
ungroup()
library(ggdist)
data_summary|>
filter(priority == "Needs Triage" |
priority == "Unbreak Now!" |
priority == "High") |>
ggplot(aes(x = week_index, y = proportion, color = priority, fill=priority, group = priority)) +
geom_smooth()+
scale_color_viridis_d(option='turbo') +
scale_fill_viridis_d(option='turbo') +
facet_grid(source ~ ., scales = "free_y") +
geom_point() +
labs(title = "Triage priority proportions for new tasks by week created",
x = "Weeks from feature deployment",
y = "% of items tagged",
color = "Priority Tag") +
theme_minimal()
dsl_df |>
filter(priority == "Needs Triage" |
priority == "Unbreak Now!" |
priority == "High") |>
filter(week_index >= -26) |>
ggplot(
aes(
x=as.factor(week_index),
y= TTR/168,
color=priority,
fill=priority
)
) +
facet_grid(source ~ .) +
geom_boxplot(outlier.shape = NA) +
theme_minimal() +
coord_cartesian(ylim = c(0, 112)) +
geom_vline(xintercept =27, linetype = "dashed", color = "black", linewidth = 0.5) +
scale_color_viridis_d() +
labs(x = "Weeks from Release", y = "Time to Resolution (weeks)", title = "TTR by Task Creation Date and Triage Priority")
dsl_df_long <- dsl_df %>%
pivot_longer(
cols = c(olmo_EP_prop, olmo_RK_prop, olmo_TSOL_prop),
names_to = "tag",
values_to = "proportion"
) %>%
mutate(tag = gsub("olmo_|_prop", "", tag),
tag = case_when(
tag == "EP" ~ "Existent Problem",
tag == "RK" ~ "Record Keeping",
tag =="TSOL" ~ "Solutions"
))
ggplot(
dsl_df,
dsl_df_long,
aes(
x=as.factor(week_index),
y= (TTR/168),
fill=isAuthorWMF
x = tag,
y = proportion,
fill = isAuthorWMF,
)
) +
facet_grid(source ~ ., scales = "free_y") +
facet_grid(source ~ .) +
geom_boxplot() +
theme_minimal() +
scale_fill_viridis_d() +
labs(x = "Weeks from Release", y = "Time to Resolution (weeks)", title = "TTR by Task Creation Date")
scale_fill_viridis_d() +
labs(
x = "Tag",
y = "% of sentences tagged",
title = "Proportion of machine tags of sentence focus, by comment author affiliation",
color = "Is Author WMF",
fill = "Is Author WMF"
)
weekly_summary <- dsl_df |>
group_by(week_index, source, isAuthorWMF)|>
@ -27,6 +97,7 @@ weekly_summary <- dsl_df |>
author_closer_sum = sum(author_closer == TRUE),
median_olmo_EP_prop_adac = median(olmo_EP_prop_adac),
median_olmo_TSOL_prop_adac = median(olmo_TSOL_prop_adac),
median_olmo_RK_prop_adac = median(olmo_RK_prop_adac),
median_comments_before_resolution = median(n_comments_before)
)

View File

@ -42,12 +42,12 @@ main_df |>
)
main_df |>
filter(ADAC=="1") |>
filter(ADAC == 1) |>
ggplot(
aes(
x = PC4,
y = PC3,
fill = isAuthorWMF
fill = as.factor(ADAC)
)
) +
facet_grid(comment_type~source,
@ -69,6 +69,7 @@ main_df |>
x = "Casual v. Formal Updates (PC3)",
y = "Technical-matter v. Procedural Commentary (PC4)",
)
#"PCs for Pre-Resolution Comments Written by Task Author (by Author Affiliation, Case, and Comment Type)"
main_df |>
filter(comment_type=="task_subcomment") |>
@ -79,7 +80,7 @@ main_df |>
fill = as.factor(ADAC)
)
) +
facet_grid(isAuthorWMF~source,
facet_grid(ADAC~source,
labeller = as_labeller(c(
"c1" = "VisualEditor (c1)",
"c2" = "HTTPS-as-default (c2)",
@ -87,7 +88,7 @@ main_df |>
))) +
geom_point(shape = 21, alpha=0.13, size=2) +
scale_fill_viridis_d(
option = "turbo",
option = "inferno",
name = "By Task Author Before Resolution",
labels = c("No", "Yes"))+
theme_minimal() +