1
0
mw-lifecycle-analysis/dsl/dsl.R

55 lines
1.3 KiB
R

library(tidyverse)
library(dsl)
dsl_csv <-"111725_DSL_frame.csv"
dsl_df <- read.csv(dsl_csv, header = TRUE)
power_model <- power_dsl(
labeled_size = c(100, 200, 300, 600, 1000),
model = "logit",
formula = dsl_score ~ human_SOL_prop_adac +
median_gerrit_loc_delta + median_gerrit_reviewers +
as.factor(isAuthorWMF) +
as.factor(source) +
median_PC3_adac +
week_index,
predicted_var = "human_SOL_prop_adac",
prediction = "olmo_SOL_prop_adac",
sample_prob = "sampling_prob",
data=dsl_df
)
summary(power_model)
plot(power_model, coef_name = "human_SOL_prop_adac")
trial_model <- dsl(
model = "logit",
formula = dsl_score ~ human_TSOL_prop_adac +
median_gerrit_loc_delta + median_gerrit_reviewers +
as.factor(isAuthorWMF) +
as.factor(author_closer) +
median_PC4_adac +
week_index + n_comments_before,
predicted_var = "human_TSOL_prop_adac",
prediction = "olmo_TSOL_prop_adac",
sample_prob = "sampling_prob",
data=dsl_df
)
summary(trial_model)
style_model <- dsl(
model = "lm",
formula = human_BE_prop ~
median_PC1 + median_PC4 +
as.factor(isAuthorWMF) +
as.factor(author_closer) +
median_PC3 +
week_index,
predicted_var = "human_BE_prop",
prediction = "olmo_BE_prop",
sample_prob = "sampling_prob",
data=dsl_df
)
summary(style_model)