101 lines
4.2 KiB
R
101 lines
4.2 KiB
R
library(tidyverse)
|
|
library(dsl)
|
|
|
|
dsl_csv <-"~/dsl/121625_DSL_frame.csv"
|
|
dsl_df <- read.csv(dsl_csv, header = TRUE)
|
|
|
|
dsl_df <- dsl_df |>
|
|
dplyr::mutate(ttr_days = TTR_hours / 24) |>
|
|
dplyr::mutate(task_resolution = dsl_score)
|
|
|
|
dev_model <- dsl(
|
|
model = "logit",
|
|
formula = task_resolution ~ human_EP_prop_adac + human_TSOL_prop_adac + human_RK_prop_adac
|
|
+ median_PC4_adac + median_PC3_adac + n_comments_before
|
|
+ median_gerrit_reviewers + week_index + as.factor(isAuthorWMF) * as.factor(source),
|
|
predicted_var = c("human_EP_prop_adac", "human_TSOL_prop_adac", "human_RK_prop_adac"),
|
|
prediction = c("olmo_EP_prop_adac", "olmo_TSOL_prop_adac", "olmo_RK_prop_adac"),
|
|
sample_prob = "sampling_prob",
|
|
cluster="source",
|
|
cross_fit = 3,
|
|
sample_split = 20,
|
|
data=dsl_df
|
|
)
|
|
summary(dev_model)
|
|
#saveRDS(dev_model, "121625_logit_dsl.RDS")
|
|
dev_model <- readRDS("dsl/121625_logit_dsl.RDS")
|
|
library(broom)
|
|
library(dplyr)
|
|
tidy.dsl <- function(x, conf.int = FALSE, conf.level = 0.95, exponentiate = FALSE, ...) {
|
|
res <- suppressMessages(dsl:::summary.dsl(object = x, ci = conf.level, ...))
|
|
terms <- row.names(res)
|
|
cols <- c("estimate" = "Estimate", "std.error" = "Std. Error", "p.value" = "p value")
|
|
if (conf.int) {
|
|
cols <- c(cols, "conf.low" = "CI Lower", "conf.high" = "CI Upper")
|
|
}
|
|
out <- as.list(res)[cols]
|
|
names(out) <- names(cols)
|
|
out <- as_tibble(as.data.frame(out))
|
|
out <- dplyr::bind_cols(term = terms, out)
|
|
if (exponentiate)
|
|
out <- broom:::exponentiate(out)
|
|
return(out)
|
|
}
|
|
coef_df <- tidy.dsl(dev_model)
|
|
coef_df <- coef_df |>
|
|
mutate(
|
|
term = recode(term,
|
|
"week_index" = "Weeks from deployment",
|
|
"(Intercept)" = "Intercept",
|
|
"n_comments_before" = "# of comments prior to resolution",
|
|
"median_PC4_adac" = "Median Author PC4 Pre-resolution",
|
|
"median_PC3_adac" = "Median Author PC3 Pre-resolution",
|
|
"median_gerrit_reviewers" = "Median # of Code Reviewers (Gerrit)",
|
|
"human_TSOL_prop_adac" = "% of sentences discussing 'Solutions'",
|
|
"human_RK_prop_adac" = "% of sentences discussing 'Record Keeping'",
|
|
"human_EP_prop_adac" = "% of sentences discussing 'Existent Problems'",
|
|
"as.factor(source)c3" = "HTTP-deprecation (factor)",
|
|
"as.factor(source)c2" = "HTTPS-login (factor)",
|
|
"as.factor(isAuthorWMF)TRUE" = "WMF-affiliated Author (factor)",
|
|
"as.factor(isAuthorWMF)FALSE" = "Nonaffiliated Author (factor)",
|
|
"as.factor(isAuthorWMF)FALSE:as.factor(source)c2" = "Nonaffiliated Author:HTTPS-login",
|
|
"as.factor(isAuthorWMF)FALSE:as.factor(source)c3" = "Nonaffiliated Author:HTTP-deprecation",
|
|
"as.factor(isAuthorWMF)TRUE:as.factor(source)c2" = "WMF-affiliated Author:HTTPS-login",
|
|
"as.factor(isAuthorWMF)TRUE:as.factor(source)c3" = "WMF-affiliated Author:HTTP-deprecation",
|
|
),
|
|
term = factor(term, levels = rev(c(
|
|
"Intercept",
|
|
"% of sentences discussing 'Existent Problems'",
|
|
"% of sentences discussing 'Solutions'",
|
|
"% of sentences discussing 'Record Keeping'",
|
|
"Median Author PC4 Pre-resolution",
|
|
"Median Author PC3 Pre-resolution",
|
|
"# of comments prior to resolution",
|
|
"Median # of Code Reviewers (Gerrit)",
|
|
"Weeks from deployment",
|
|
"HTTPS-login (factor)",
|
|
"HTTP-deprecation (factor)",
|
|
"Nonaffiliated Author (factor)",
|
|
"WMF-affiliated Author (factor)",
|
|
"Nonaffiliated Author:HTTPS-login",
|
|
"WMF-affiliated Author:HTTPS-login",
|
|
"Nonaffiliated Author:HTTP-deprecation",
|
|
"WMF-affiliated Author:HTTP-deprecation"
|
|
)))
|
|
)
|
|
dsl_coefs <- ggplot(coef_df, aes(x = estimate, y = term)) +
|
|
geom_point(size = 1) +
|
|
geom_errorbar(aes(xmin = estimate - 1.96*std.error, xmax = estimate + 1.96 *std.error), height = 0.2) +
|
|
geom_vline(xintercept = 0, linetype = "dashed", color = "red") +
|
|
labs(x = "Log-odds Coefficient Estimate",
|
|
y = "Variable") +
|
|
theme_minimal()
|
|
dsl_coefs
|
|
ggsave(
|
|
filename = "121625_dsl_coefs.png",
|
|
plot = dsl_coefs,
|
|
width = 6, # inches
|
|
height = 6, # inches
|
|
dpi = 800 # high resolution
|
|
)
|