1
0

updating some plots for results section, also saving model to file

This commit is contained in:
Matthew Gaughan 2025-12-02 14:20:38 -08:00
parent 90594d1ce3
commit d513e245b5
4 changed files with 127 additions and 3 deletions

BIN
dsl/120225_logit_dsl.RDS Normal file

Binary file not shown.

View File

@ -1,7 +1,7 @@
library(tidyverse) library(tidyverse)
library(dsl) library(dsl)
dsl_csv <-"111725_DSL_frame.csv" dsl_csv <-"~/dsl/111725_DSL_frame.csv"
dsl_df <- read.csv(dsl_csv, header = TRUE) dsl_df <- read.csv(dsl_csv, header = TRUE)
dsl_df <- dsl_df |> dsl_df <- dsl_df |>
@ -81,7 +81,7 @@ dev_model <- dsl(
data=dsl_df data=dsl_df
) )
summary(dev_model) summary(dev_model)
saveRDS(dev_model, "120225_logit_dsl.RDS")
library(broom) library(broom)
library(dplyr) library(dplyr)
@ -101,6 +101,43 @@ tidy.dsl <- function(x, conf.int = FALSE, conf.level = 0.95, exponentiate = FALS
return(out) return(out)
} }
coef_df <- tidy.dsl(dev_model) coef_df <- tidy.dsl(dev_model)
coef_df <- coef_df |>
mutate(
term = recode(term,
"week_index" = "Weeks from deployment",
"(Intercept)" = "Intercept",
"n_comments_before" = "# of comments prior to resolution",
"median_PC4_adac" = "Median Author PC4 Pre-resolution",
"median_PC3_adac" = "Median Author PC3 Pre-resolution",
"median_gerrit_reviewers" = "Median # of Code Reviewers (Gerrit)",
"median_gerrit_loc_delta" = "Median LoC Changed (Gerrit)",
"human_TSOL_prop_adac" = "% of sentences discussing 'Solutions'",
"human_RK_prop_adac" = "% of sentences discussing 'Record Keeping'",
"human_EP_prop_adac" = "% of sentences discussing 'Existent Problems'",
"as.factor(source)c3" = "HTTP-deprecation (factor)",
"as.factor(source)c2" = "HTTPS-as-default (factor)",
"as.factor(isAuthorWMF)TRUE" = "WMF-affiliate Author (factor)",
"as.factor(isAuthorWMF)TRUE:as.factor(source)c2" = "WMF-affiliate Author:HTTPS-as-default",
"as.factor(isAuthorWMF)TRUE:as.factor(source)c3" = "WMF-affiliate Author:HTTP-deprecation",
),
term = factor(term, levels = rev(c(
"Intercept",
"% of sentences discussing 'Existent Problems'",
"% of sentences discussing 'Solutions'",
"% of sentences discussing 'Record Keeping'",
"Median Author PC4 Pre-resolution",
"Median Author PC3 Pre-resolution",
"# of comments prior to resolution",
"Median # of Code Reviewers (Gerrit)",
"Median LoC Changed (Gerrit)",
"Weeks from deployment",
"HTTPS-as-default (factor)",
"HTTP-deprecation (factor)",
"WMF-affiliate Author (factor)",
"WMF-affiliate Author:HTTPS-as-default",
"WMF-affiliate Author:HTTP-deprecation"
)))
)
ggplot(coef_df, aes(x = estimate, y = term)) + ggplot(coef_df, aes(x = estimate, y = term)) +
geom_point(size = 1) + geom_point(size = 1) +
geom_errorbar(aes(xmin = estimate - 1.96*std.error, xmax = estimate + 1.96 *std.error), height = 0.2) + geom_errorbar(aes(xmin = estimate - 1.96*std.error, xmax = estimate + 1.96 *std.error), height = 0.2) +

View File

@ -93,7 +93,8 @@ ggplot(
geom_point() + geom_point() +
geom_smooth() + geom_smooth() +
scale_color_viridis_d() + scale_color_viridis_d() +
theme_minimal() theme_minimal() +
labs(x = "Weeks from Release", y = "% of sentences machine-tagged as'Existent Problems'", title = "Proportion of 'Existent Problems' tags over time")
dsl_df <- dsl_df |> dsl_df <- dsl_df |>
mutate(priority = factor(priority, mutate(priority = factor(priority,

View File

@ -12,6 +12,92 @@ library(dplyr)
main_csv <- "~/analysis_data/110925_unified.csv" main_csv <- "~/analysis_data/110925_unified.csv"
main_df <- read.csv(main_csv , header = TRUE) main_df <- read.csv(main_csv , header = TRUE)
main_df |>
ggplot(
aes(
x = PC4,
y = PC3,
fill = comment_type
)
) +
facet_grid(~source, scales="fixed",
labeller = as_labeller(c(
"c1" = "VisualEditor (c1)",
"c2" = "HTTPS-as-default (c2)",
"c3" = "HTTP-deprecation (c3)"
))) +
geom_point(shape = 21, alpha=0.3, size=2) +
xlim(-50, 50) +
ylim(-50, 50) +
scale_fill_viridis_d(
option = "magma",
name = "Comment type",
labels = c("Task Description", "Reply"))+
theme_minimal() +
theme(legend.position = "top") +
labs(
title = "PCs for Task Comments by comment type and case",
x = "Casual v. Formal Updates (PC3)",
y = "Technical-matter v. Procedural Commentary (PC4)",
)
main_df |>
filter(ADAC=="1") |>
ggplot(
aes(
x = PC4,
y = PC3,
fill = isAuthorWMF
)
) +
facet_grid(comment_type~source,
labeller = as_labeller(c(
"c1" = "VisualEditor (c1)",
"c2" = "HTTPS-as-default (c2)",
"c3" = "HTTP-deprecation (c3)",
"task_description" = "Task Description",
"task_subcomment" = "Follow-up Reply"
))) +
geom_point(shape = 21, alpha=0.3, size=2) +
scale_fill_viridis_d(
name = "Comment Author Affiliation",
labels = c("Nonaffiliated", "WMF-affiliated"))+
theme_minimal() +
theme(legend.position = "top") +
labs(
title = "PCs for Pre-Resolution Comments Written by Task Author (by Author Affiliation, Case, and Comment Type)",
x = "Casual v. Formal Updates (PC3)",
y = "Technical-matter v. Procedural Commentary (PC4)",
)
main_df |>
filter(comment_type=="task_subcomment") |>
ggplot(
aes(
x = PC4,
y = PC3,
fill = as.factor(ADAC)
)
) +
facet_grid(isAuthorWMF~source,
labeller = as_labeller(c(
"c1" = "VisualEditor (c1)",
"c2" = "HTTPS-as-default (c2)",
"c3" = "HTTP-deprecation (c3)"
))) +
geom_point(shape = 21, alpha=0.13, size=2) +
scale_fill_viridis_d(
option = "turbo",
name = "By Task Author Before Resolution",
labels = c("No", "Yes"))+
theme_minimal() +
theme(legend.position = "top") +
labs(
title = "PCs for Replies (by Author Affiliation, Case, and Comment Type)",
x = "Casual v. Formal Updates (PC3)",
y = "Technical-matter v. Procedural Commentary (PC4)",
)
main_df <- main_df |> main_df <- main_df |>
mutate( mutate(
comment_wordcount = as.integer(stringr::str_count(tidyr::replace_na(as.character(comment_text), ""), "\\S+")) comment_wordcount = as.integer(stringr::str_count(tidyr::replace_na(as.character(comment_text), ""), "\\S+"))