library(tidyverse) library(dsl) dsl_csv <-"~/dsl/102725_DSL_df_adac.csv" dsl_df <- read.csv(dsl_csv, header = TRUE) power_model <- power_dsl( labeled_size = c(100, 200, 300, 600, 1000), model = "logit", formula = dsl_score ~ human_SOL_prop_adac + median_gerrit_loc_delta + median_gerrit_reviewers + as.factor(isAuthorWMF) + as.factor(source) + median_PC3_adac + week_index, predicted_var = "human_SOL_prop_adac", prediction = "olmo_SOL_prop_adac", sample_prob = "sampling_prob", data=dsl_df ) summary(power_model) plot(power_model, coef_name = "human_SOL_prop_adac") dsl_df <- dsl_df |> filter(source=="c1") trial_model <- dsl( model = "logit", formula = dsl_score ~ human_BI_prop_adac + median_gerrit_loc_delta + median_gerrit_reviewers + as.factor(isAuthorWMF) + as.factor(author_closer) + median_PC4_adac + week_index, predicted_var = "human_BI_prop_adac", prediction = "olmo_BI_prop_adac", sample_prob = "sampling_prob", data=dsl_df ) summary(trial_model) style_model <- dsl( model = "lm", formula = human_BE_prop ~ median_PC1 + median_PC4 + as.factor(isAuthorWMF) + as.factor(author_closer) + median_PC3 + week_index, predicted_var = "human_BE_prop", prediction = "olmo_BE_prop", sample_prob = "sampling_prob", data=dsl_df ) summary(style_model)