updating some plots for results section, also saving model to file
This commit is contained in:
parent
90594d1ce3
commit
d513e245b5
BIN
dsl/120225_logit_dsl.RDS
Normal file
BIN
dsl/120225_logit_dsl.RDS
Normal file
Binary file not shown.
41
dsl/dsl.R
41
dsl/dsl.R
@ -1,7 +1,7 @@
|
|||||||
library(tidyverse)
|
library(tidyverse)
|
||||||
library(dsl)
|
library(dsl)
|
||||||
|
|
||||||
dsl_csv <-"111725_DSL_frame.csv"
|
dsl_csv <-"~/dsl/111725_DSL_frame.csv"
|
||||||
dsl_df <- read.csv(dsl_csv, header = TRUE)
|
dsl_df <- read.csv(dsl_csv, header = TRUE)
|
||||||
|
|
||||||
dsl_df <- dsl_df |>
|
dsl_df <- dsl_df |>
|
||||||
@ -81,7 +81,7 @@ dev_model <- dsl(
|
|||||||
data=dsl_df
|
data=dsl_df
|
||||||
)
|
)
|
||||||
summary(dev_model)
|
summary(dev_model)
|
||||||
|
saveRDS(dev_model, "120225_logit_dsl.RDS")
|
||||||
|
|
||||||
library(broom)
|
library(broom)
|
||||||
library(dplyr)
|
library(dplyr)
|
||||||
@ -101,6 +101,43 @@ tidy.dsl <- function(x, conf.int = FALSE, conf.level = 0.95, exponentiate = FALS
|
|||||||
return(out)
|
return(out)
|
||||||
}
|
}
|
||||||
coef_df <- tidy.dsl(dev_model)
|
coef_df <- tidy.dsl(dev_model)
|
||||||
|
coef_df <- coef_df |>
|
||||||
|
mutate(
|
||||||
|
term = recode(term,
|
||||||
|
"week_index" = "Weeks from deployment",
|
||||||
|
"(Intercept)" = "Intercept",
|
||||||
|
"n_comments_before" = "# of comments prior to resolution",
|
||||||
|
"median_PC4_adac" = "Median Author PC4 Pre-resolution",
|
||||||
|
"median_PC3_adac" = "Median Author PC3 Pre-resolution",
|
||||||
|
"median_gerrit_reviewers" = "Median # of Code Reviewers (Gerrit)",
|
||||||
|
"median_gerrit_loc_delta" = "Median LoC Changed (Gerrit)",
|
||||||
|
"human_TSOL_prop_adac" = "% of sentences discussing 'Solutions'",
|
||||||
|
"human_RK_prop_adac" = "% of sentences discussing 'Record Keeping'",
|
||||||
|
"human_EP_prop_adac" = "% of sentences discussing 'Existent Problems'",
|
||||||
|
"as.factor(source)c3" = "HTTP-deprecation (factor)",
|
||||||
|
"as.factor(source)c2" = "HTTPS-as-default (factor)",
|
||||||
|
"as.factor(isAuthorWMF)TRUE" = "WMF-affiliate Author (factor)",
|
||||||
|
"as.factor(isAuthorWMF)TRUE:as.factor(source)c2" = "WMF-affiliate Author:HTTPS-as-default",
|
||||||
|
"as.factor(isAuthorWMF)TRUE:as.factor(source)c3" = "WMF-affiliate Author:HTTP-deprecation",
|
||||||
|
),
|
||||||
|
term = factor(term, levels = rev(c(
|
||||||
|
"Intercept",
|
||||||
|
"% of sentences discussing 'Existent Problems'",
|
||||||
|
"% of sentences discussing 'Solutions'",
|
||||||
|
"% of sentences discussing 'Record Keeping'",
|
||||||
|
"Median Author PC4 Pre-resolution",
|
||||||
|
"Median Author PC3 Pre-resolution",
|
||||||
|
"# of comments prior to resolution",
|
||||||
|
"Median # of Code Reviewers (Gerrit)",
|
||||||
|
"Median LoC Changed (Gerrit)",
|
||||||
|
"Weeks from deployment",
|
||||||
|
"HTTPS-as-default (factor)",
|
||||||
|
"HTTP-deprecation (factor)",
|
||||||
|
"WMF-affiliate Author (factor)",
|
||||||
|
"WMF-affiliate Author:HTTPS-as-default",
|
||||||
|
"WMF-affiliate Author:HTTP-deprecation"
|
||||||
|
)))
|
||||||
|
)
|
||||||
ggplot(coef_df, aes(x = estimate, y = term)) +
|
ggplot(coef_df, aes(x = estimate, y = term)) +
|
||||||
geom_point(size = 1) +
|
geom_point(size = 1) +
|
||||||
geom_errorbar(aes(xmin = estimate - 1.96*std.error, xmax = estimate + 1.96 *std.error), height = 0.2) +
|
geom_errorbar(aes(xmin = estimate - 1.96*std.error, xmax = estimate + 1.96 *std.error), height = 0.2) +
|
||||||
|
|||||||
@ -93,7 +93,8 @@ ggplot(
|
|||||||
geom_point() +
|
geom_point() +
|
||||||
geom_smooth() +
|
geom_smooth() +
|
||||||
scale_color_viridis_d() +
|
scale_color_viridis_d() +
|
||||||
theme_minimal()
|
theme_minimal() +
|
||||||
|
labs(x = "Weeks from Release", y = "% of sentences machine-tagged as'Existent Problems'", title = "Proportion of 'Existent Problems' tags over time")
|
||||||
|
|
||||||
dsl_df <- dsl_df |>
|
dsl_df <- dsl_df |>
|
||||||
mutate(priority = factor(priority,
|
mutate(priority = factor(priority,
|
||||||
|
|||||||
@ -12,6 +12,92 @@ library(dplyr)
|
|||||||
main_csv <- "~/analysis_data/110925_unified.csv"
|
main_csv <- "~/analysis_data/110925_unified.csv"
|
||||||
main_df <- read.csv(main_csv , header = TRUE)
|
main_df <- read.csv(main_csv , header = TRUE)
|
||||||
|
|
||||||
|
main_df |>
|
||||||
|
ggplot(
|
||||||
|
aes(
|
||||||
|
x = PC4,
|
||||||
|
y = PC3,
|
||||||
|
fill = comment_type
|
||||||
|
)
|
||||||
|
) +
|
||||||
|
facet_grid(~source, scales="fixed",
|
||||||
|
labeller = as_labeller(c(
|
||||||
|
"c1" = "VisualEditor (c1)",
|
||||||
|
"c2" = "HTTPS-as-default (c2)",
|
||||||
|
"c3" = "HTTP-deprecation (c3)"
|
||||||
|
))) +
|
||||||
|
geom_point(shape = 21, alpha=0.3, size=2) +
|
||||||
|
xlim(-50, 50) +
|
||||||
|
ylim(-50, 50) +
|
||||||
|
scale_fill_viridis_d(
|
||||||
|
option = "magma",
|
||||||
|
name = "Comment type",
|
||||||
|
labels = c("Task Description", "Reply"))+
|
||||||
|
theme_minimal() +
|
||||||
|
theme(legend.position = "top") +
|
||||||
|
labs(
|
||||||
|
title = "PCs for Task Comments by comment type and case",
|
||||||
|
x = "Casual v. Formal Updates (PC3)",
|
||||||
|
y = "Technical-matter v. Procedural Commentary (PC4)",
|
||||||
|
)
|
||||||
|
|
||||||
|
main_df |>
|
||||||
|
filter(ADAC=="1") |>
|
||||||
|
ggplot(
|
||||||
|
aes(
|
||||||
|
x = PC4,
|
||||||
|
y = PC3,
|
||||||
|
fill = isAuthorWMF
|
||||||
|
)
|
||||||
|
) +
|
||||||
|
facet_grid(comment_type~source,
|
||||||
|
labeller = as_labeller(c(
|
||||||
|
"c1" = "VisualEditor (c1)",
|
||||||
|
"c2" = "HTTPS-as-default (c2)",
|
||||||
|
"c3" = "HTTP-deprecation (c3)",
|
||||||
|
"task_description" = "Task Description",
|
||||||
|
"task_subcomment" = "Follow-up Reply"
|
||||||
|
))) +
|
||||||
|
geom_point(shape = 21, alpha=0.3, size=2) +
|
||||||
|
scale_fill_viridis_d(
|
||||||
|
name = "Comment Author Affiliation",
|
||||||
|
labels = c("Nonaffiliated", "WMF-affiliated"))+
|
||||||
|
theme_minimal() +
|
||||||
|
theme(legend.position = "top") +
|
||||||
|
labs(
|
||||||
|
title = "PCs for Pre-Resolution Comments Written by Task Author (by Author Affiliation, Case, and Comment Type)",
|
||||||
|
x = "Casual v. Formal Updates (PC3)",
|
||||||
|
y = "Technical-matter v. Procedural Commentary (PC4)",
|
||||||
|
)
|
||||||
|
|
||||||
|
main_df |>
|
||||||
|
filter(comment_type=="task_subcomment") |>
|
||||||
|
ggplot(
|
||||||
|
aes(
|
||||||
|
x = PC4,
|
||||||
|
y = PC3,
|
||||||
|
fill = as.factor(ADAC)
|
||||||
|
)
|
||||||
|
) +
|
||||||
|
facet_grid(isAuthorWMF~source,
|
||||||
|
labeller = as_labeller(c(
|
||||||
|
"c1" = "VisualEditor (c1)",
|
||||||
|
"c2" = "HTTPS-as-default (c2)",
|
||||||
|
"c3" = "HTTP-deprecation (c3)"
|
||||||
|
))) +
|
||||||
|
geom_point(shape = 21, alpha=0.13, size=2) +
|
||||||
|
scale_fill_viridis_d(
|
||||||
|
option = "turbo",
|
||||||
|
name = "By Task Author Before Resolution",
|
||||||
|
labels = c("No", "Yes"))+
|
||||||
|
theme_minimal() +
|
||||||
|
theme(legend.position = "top") +
|
||||||
|
labs(
|
||||||
|
title = "PCs for Replies (by Author Affiliation, Case, and Comment Type)",
|
||||||
|
x = "Casual v. Formal Updates (PC3)",
|
||||||
|
y = "Technical-matter v. Procedural Commentary (PC4)",
|
||||||
|
)
|
||||||
|
|
||||||
main_df <- main_df |>
|
main_df <- main_df |>
|
||||||
mutate(
|
mutate(
|
||||||
comment_wordcount = as.integer(stringr::str_count(tidyr::replace_na(as.character(comment_text), ""), "\\S+"))
|
comment_wordcount = as.integer(stringr::str_count(tidyr::replace_na(as.character(comment_text), ""), "\\S+"))
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user