diff --git a/dsl/final_bivariate.R b/dsl/final_bivariate.R index 66f07a1..9ceda7f 100644 --- a/dsl/final_bivariate.R +++ b/dsl/final_bivariate.R @@ -4,20 +4,90 @@ library(dplyr) dsl_csv <-"~/dsl/111725_DSL_frame.csv" dsl_df <- read.csv(dsl_csv, header = TRUE) +dsl_df <- dsl_df |> + mutate(priority = factor(priority, + levels = rev(c("Unbreak Now!", "High", "Medium", "Low", "Lowest", "Needs Triage")))) + +data_summary <- dsl_df %>% + group_by(week_index, priority, source) %>% + summarise(count = n(), .groups = 'drop') |> + group_by(week_index, source) %>% + mutate(proportion = count / sum(count)) %>% + ungroup() + +library(ggdist) + +data_summary|> + filter(priority == "Needs Triage" | + priority == "Unbreak Now!" | + priority == "High") |> + ggplot(aes(x = week_index, y = proportion, color = priority, fill=priority, group = priority)) + + geom_smooth()+ + scale_color_viridis_d(option='turbo') + + scale_fill_viridis_d(option='turbo') + + facet_grid(source ~ ., scales = "free_y") + + geom_point() + + labs(title = "Triage priority proportions for new tasks by week created", + x = "Weeks from feature deployment", + y = "% of items tagged", + color = "Priority Tag") + + theme_minimal() + +dsl_df |> + filter(priority == "Needs Triage" | + priority == "Unbreak Now!" | + priority == "High") |> + filter(week_index >= -26) |> + ggplot( + aes( + x=as.factor(week_index), + y= TTR/168, + color=priority, + fill=priority + ) + ) + + facet_grid(source ~ .) + + geom_boxplot(outlier.shape = NA) + + theme_minimal() + + coord_cartesian(ylim = c(0, 112)) + + geom_vline(xintercept =27, linetype = "dashed", color = "black", linewidth = 0.5) + + scale_color_viridis_d() + + labs(x = "Weeks from Release", y = "Time to Resolution (weeks)", title = "TTR by Task Creation Date and Triage Priority") + + +dsl_df_long <- dsl_df %>% + pivot_longer( + cols = c(olmo_EP_prop, olmo_RK_prop, olmo_TSOL_prop), + names_to = "tag", + values_to = "proportion" + ) %>% + mutate(tag = gsub("olmo_|_prop", "", tag), + tag = case_when( + tag == "EP" ~ "Existent Problem", + tag == "RK" ~ "Record Keeping", + tag =="TSOL" ~ "Solutions" + )) ggplot( - dsl_df, + dsl_df_long, aes( - x=as.factor(week_index), - y= (TTR/168), - fill=isAuthorWMF + x = tag, + y = proportion, + fill = isAuthorWMF, ) ) + - facet_grid(source ~ ., scales = "free_y") + + facet_grid(source ~ .) + geom_boxplot() + theme_minimal() + - scale_fill_viridis_d() + - labs(x = "Weeks from Release", y = "Time to Resolution (weeks)", title = "TTR by Task Creation Date") + scale_fill_viridis_d() + + labs( + x = "Tag", + y = "% of sentences tagged", + title = "Proportion of machine tags of sentence focus, by comment author affiliation", + color = "Is Author WMF", + fill = "Is Author WMF" + ) + weekly_summary <- dsl_df |> group_by(week_index, source, isAuthorWMF)|> @@ -27,6 +97,7 @@ weekly_summary <- dsl_df |> author_closer_sum = sum(author_closer == TRUE), median_olmo_EP_prop_adac = median(olmo_EP_prop_adac), median_olmo_TSOL_prop_adac = median(olmo_TSOL_prop_adac), + median_olmo_RK_prop_adac = median(olmo_RK_prop_adac), median_comments_before_resolution = median(n_comments_before) ) diff --git a/p2/quest/neurobiber_PCA_analysis.R b/p2/quest/neurobiber_PCA_analysis.R index 98824a3..3b3e5f5 100644 --- a/p2/quest/neurobiber_PCA_analysis.R +++ b/p2/quest/neurobiber_PCA_analysis.R @@ -42,12 +42,12 @@ main_df |> ) main_df |> - filter(ADAC=="1") |> + filter(ADAC == 1) |> ggplot( aes( x = PC4, y = PC3, - fill = isAuthorWMF + fill = as.factor(ADAC) ) ) + facet_grid(comment_type~source, @@ -69,6 +69,7 @@ main_df |> x = "Casual v. Formal Updates (PC3)", y = "Technical-matter v. Procedural Commentary (PC4)", ) +#"PCs for Pre-Resolution Comments Written by Task Author (by Author Affiliation, Case, and Comment Type)" main_df |> filter(comment_type=="task_subcomment") |> @@ -79,7 +80,7 @@ main_df |> fill = as.factor(ADAC) ) ) + - facet_grid(isAuthorWMF~source, + facet_grid(ADAC~source, labeller = as_labeller(c( "c1" = "VisualEditor (c1)", "c2" = "HTTPS-as-default (c2)", @@ -87,7 +88,7 @@ main_df |> ))) + geom_point(shape = 21, alpha=0.13, size=2) + scale_fill_viridis_d( - option = "turbo", + option = "inferno", name = "By Task Author Before Resolution", labels = c("No", "Yes"))+ theme_minimal() +