57 lines
1.3 KiB
R
57 lines
1.3 KiB
R
library(tidyverse)
|
|
library(dsl)
|
|
|
|
dsl_csv <-"~/dsl/102725_DSL_df_adac.csv"
|
|
dsl_df <- read.csv(dsl_csv, header = TRUE)
|
|
|
|
|
|
power_model <- power_dsl(
|
|
labeled_size = c(100, 200, 300, 600, 1000),
|
|
model = "logit",
|
|
formula = dsl_score ~ human_SOL_prop_adac +
|
|
median_gerrit_loc_delta + median_gerrit_reviewers +
|
|
as.factor(isAuthorWMF) +
|
|
as.factor(source) +
|
|
median_PC3_adac +
|
|
week_index,
|
|
predicted_var = "human_SOL_prop_adac",
|
|
prediction = "olmo_SOL_prop_adac",
|
|
sample_prob = "sampling_prob",
|
|
data=dsl_df
|
|
)
|
|
summary(power_model)
|
|
plot(power_model, coef_name = "human_SOL_prop_adac")
|
|
|
|
dsl_df <- dsl_df |>
|
|
filter(source=="c1")
|
|
|
|
trial_model <- dsl(
|
|
model = "logit",
|
|
formula = dsl_score ~ human_BI_prop_adac +
|
|
median_gerrit_loc_delta + median_gerrit_reviewers +
|
|
as.factor(isAuthorWMF) +
|
|
as.factor(author_closer) +
|
|
median_PC4_adac +
|
|
week_index,
|
|
predicted_var = "human_BI_prop_adac",
|
|
prediction = "olmo_BI_prop_adac",
|
|
sample_prob = "sampling_prob",
|
|
data=dsl_df
|
|
)
|
|
summary(trial_model)
|
|
|
|
style_model <- dsl(
|
|
model = "lm",
|
|
formula = human_BE_prop ~
|
|
median_PC1 + median_PC4 +
|
|
as.factor(isAuthorWMF) +
|
|
as.factor(author_closer) +
|
|
median_PC3 +
|
|
week_index,
|
|
predicted_var = "human_BE_prop",
|
|
prediction = "olmo_BE_prop",
|
|
sample_prob = "sampling_prob",
|
|
data=dsl_df
|
|
)
|
|
summary(style_model)
|