1
0
mw-lifecycle-analysis/dsl/dsl.R
2025-11-03 10:04:42 -08:00

57 lines
1.3 KiB
R

library(tidyverse)
library(dsl)
dsl_csv <-"~/dsl/102725_DSL_df_adac.csv"
dsl_df <- read.csv(dsl_csv, header = TRUE)
power_model <- power_dsl(
labeled_size = c(100, 200, 300, 600, 1000),
model = "logit",
formula = dsl_score ~ human_SOL_prop_adac +
median_gerrit_loc_delta + median_gerrit_reviewers +
as.factor(isAuthorWMF) +
as.factor(source) +
median_PC3_adac +
week_index,
predicted_var = "human_SOL_prop_adac",
prediction = "olmo_SOL_prop_adac",
sample_prob = "sampling_prob",
data=dsl_df
)
summary(power_model)
plot(power_model, coef_name = "human_SOL_prop_adac")
dsl_df <- dsl_df |>
filter(source=="c1")
trial_model <- dsl(
model = "logit",
formula = dsl_score ~ human_BI_prop_adac +
median_gerrit_loc_delta + median_gerrit_reviewers +
as.factor(isAuthorWMF) +
as.factor(author_closer) +
median_PC4_adac +
week_index,
predicted_var = "human_BI_prop_adac",
prediction = "olmo_BI_prop_adac",
sample_prob = "sampling_prob",
data=dsl_df
)
summary(trial_model)
style_model <- dsl(
model = "lm",
formula = human_BE_prop ~
median_PC1 + median_PC4 +
as.factor(isAuthorWMF) +
as.factor(author_closer) +
median_PC3 +
week_index,
predicted_var = "human_BE_prop",
prediction = "olmo_BE_prop",
sample_prob = "sampling_prob",
data=dsl_df
)
summary(style_model)