diff --git a/dsl/120225_logit_dsl.RDS b/dsl/120225_logit_dsl.RDS new file mode 100644 index 0000000..ffdcccf Binary files /dev/null and b/dsl/120225_logit_dsl.RDS differ diff --git a/dsl/dsl.R b/dsl/dsl.R index 5bb800c..e47b7e0 100644 --- a/dsl/dsl.R +++ b/dsl/dsl.R @@ -1,7 +1,7 @@ library(tidyverse) library(dsl) -dsl_csv <-"111725_DSL_frame.csv" +dsl_csv <-"~/dsl/111725_DSL_frame.csv" dsl_df <- read.csv(dsl_csv, header = TRUE) dsl_df <- dsl_df |> @@ -81,7 +81,7 @@ dev_model <- dsl( data=dsl_df ) summary(dev_model) - +saveRDS(dev_model, "120225_logit_dsl.RDS") library(broom) library(dplyr) @@ -101,6 +101,43 @@ tidy.dsl <- function(x, conf.int = FALSE, conf.level = 0.95, exponentiate = FALS return(out) } coef_df <- tidy.dsl(dev_model) +coef_df <- coef_df |> + mutate( + term = recode(term, + "week_index" = "Weeks from deployment", + "(Intercept)" = "Intercept", + "n_comments_before" = "# of comments prior to resolution", + "median_PC4_adac" = "Median Author PC4 Pre-resolution", + "median_PC3_adac" = "Median Author PC3 Pre-resolution", + "median_gerrit_reviewers" = "Median # of Code Reviewers (Gerrit)", + "median_gerrit_loc_delta" = "Median LoC Changed (Gerrit)", + "human_TSOL_prop_adac" = "% of sentences discussing 'Solutions'", + "human_RK_prop_adac" = "% of sentences discussing 'Record Keeping'", + "human_EP_prop_adac" = "% of sentences discussing 'Existent Problems'", + "as.factor(source)c3" = "HTTP-deprecation (factor)", + "as.factor(source)c2" = "HTTPS-as-default (factor)", + "as.factor(isAuthorWMF)TRUE" = "WMF-affiliate Author (factor)", + "as.factor(isAuthorWMF)TRUE:as.factor(source)c2" = "WMF-affiliate Author:HTTPS-as-default", + "as.factor(isAuthorWMF)TRUE:as.factor(source)c3" = "WMF-affiliate Author:HTTP-deprecation", + ), + term = factor(term, levels = rev(c( + "Intercept", + "% of sentences discussing 'Existent Problems'", + "% of sentences discussing 'Solutions'", + "% of sentences discussing 'Record Keeping'", + "Median Author PC4 Pre-resolution", + "Median Author PC3 Pre-resolution", + "# of comments prior to resolution", + "Median # of Code Reviewers (Gerrit)", + "Median LoC Changed (Gerrit)", + "Weeks from deployment", + "HTTPS-as-default (factor)", + "HTTP-deprecation (factor)", + "WMF-affiliate Author (factor)", + "WMF-affiliate Author:HTTPS-as-default", + "WMF-affiliate Author:HTTP-deprecation" + ))) + ) ggplot(coef_df, aes(x = estimate, y = term)) + geom_point(size = 1) + geom_errorbar(aes(xmin = estimate - 1.96*std.error, xmax = estimate + 1.96 *std.error), height = 0.2) + diff --git a/dsl/final_bivariate.R b/dsl/final_bivariate.R index 3df73e4..66f07a1 100644 --- a/dsl/final_bivariate.R +++ b/dsl/final_bivariate.R @@ -93,7 +93,8 @@ ggplot( geom_point() + geom_smooth() + scale_color_viridis_d() + - theme_minimal() + theme_minimal() + + labs(x = "Weeks from Release", y = "% of sentences machine-tagged as'Existent Problems'", title = "Proportion of 'Existent Problems' tags over time") dsl_df <- dsl_df |> mutate(priority = factor(priority, diff --git a/p2/quest/neurobiber_PCA_analysis.R b/p2/quest/neurobiber_PCA_analysis.R index 1689783..98824a3 100644 --- a/p2/quest/neurobiber_PCA_analysis.R +++ b/p2/quest/neurobiber_PCA_analysis.R @@ -12,6 +12,92 @@ library(dplyr) main_csv <- "~/analysis_data/110925_unified.csv" main_df <- read.csv(main_csv , header = TRUE) +main_df |> + ggplot( + aes( + x = PC4, + y = PC3, + fill = comment_type + ) + ) + + facet_grid(~source, scales="fixed", + labeller = as_labeller(c( + "c1" = "VisualEditor (c1)", + "c2" = "HTTPS-as-default (c2)", + "c3" = "HTTP-deprecation (c3)" + ))) + + geom_point(shape = 21, alpha=0.3, size=2) + + xlim(-50, 50) + + ylim(-50, 50) + + scale_fill_viridis_d( + option = "magma", + name = "Comment type", + labels = c("Task Description", "Reply"))+ + theme_minimal() + + theme(legend.position = "top") + + labs( + title = "PCs for Task Comments by comment type and case", + x = "Casual v. Formal Updates (PC3)", + y = "Technical-matter v. Procedural Commentary (PC4)", + ) + +main_df |> + filter(ADAC=="1") |> + ggplot( + aes( + x = PC4, + y = PC3, + fill = isAuthorWMF + ) + ) + + facet_grid(comment_type~source, + labeller = as_labeller(c( + "c1" = "VisualEditor (c1)", + "c2" = "HTTPS-as-default (c2)", + "c3" = "HTTP-deprecation (c3)", + "task_description" = "Task Description", + "task_subcomment" = "Follow-up Reply" + ))) + + geom_point(shape = 21, alpha=0.3, size=2) + + scale_fill_viridis_d( + name = "Comment Author Affiliation", + labels = c("Nonaffiliated", "WMF-affiliated"))+ + theme_minimal() + + theme(legend.position = "top") + + labs( + title = "PCs for Pre-Resolution Comments Written by Task Author (by Author Affiliation, Case, and Comment Type)", + x = "Casual v. Formal Updates (PC3)", + y = "Technical-matter v. Procedural Commentary (PC4)", + ) + +main_df |> + filter(comment_type=="task_subcomment") |> + ggplot( + aes( + x = PC4, + y = PC3, + fill = as.factor(ADAC) + ) + ) + + facet_grid(isAuthorWMF~source, + labeller = as_labeller(c( + "c1" = "VisualEditor (c1)", + "c2" = "HTTPS-as-default (c2)", + "c3" = "HTTP-deprecation (c3)" + ))) + + geom_point(shape = 21, alpha=0.13, size=2) + + scale_fill_viridis_d( + option = "turbo", + name = "By Task Author Before Resolution", + labels = c("No", "Yes"))+ + theme_minimal() + + theme(legend.position = "top") + + labs( + title = "PCs for Replies (by Author Affiliation, Case, and Comment Type)", + x = "Casual v. Formal Updates (PC3)", + y = "Technical-matter v. Procedural Commentary (PC4)", + ) + main_df <- main_df |> mutate( comment_wordcount = as.integer(stringr::str_count(tidyr::replace_na(as.character(comment_text), ""), "\\S+"))