55 lines
1.3 KiB
R
55 lines
1.3 KiB
R
library(tidyverse)
|
|
library(dsl)
|
|
|
|
dsl_csv <-"111725_DSL_frame.csv"
|
|
dsl_df <- read.csv(dsl_csv, header = TRUE)
|
|
|
|
|
|
power_model <- power_dsl(
|
|
labeled_size = c(100, 200, 300, 600, 1000),
|
|
model = "logit",
|
|
formula = dsl_score ~ human_SOL_prop_adac +
|
|
median_gerrit_loc_delta + median_gerrit_reviewers +
|
|
as.factor(isAuthorWMF) +
|
|
as.factor(source) +
|
|
median_PC3_adac +
|
|
week_index,
|
|
predicted_var = "human_SOL_prop_adac",
|
|
prediction = "olmo_SOL_prop_adac",
|
|
sample_prob = "sampling_prob",
|
|
data=dsl_df
|
|
)
|
|
summary(power_model)
|
|
plot(power_model, coef_name = "human_SOL_prop_adac")
|
|
|
|
|
|
trial_model <- dsl(
|
|
model = "logit",
|
|
formula = dsl_score ~ human_TSOL_prop_adac +
|
|
median_gerrit_loc_delta + median_gerrit_reviewers +
|
|
as.factor(isAuthorWMF) +
|
|
as.factor(author_closer) +
|
|
median_PC4_adac +
|
|
week_index + n_comments_before,
|
|
predicted_var = "human_TSOL_prop_adac",
|
|
prediction = "olmo_TSOL_prop_adac",
|
|
sample_prob = "sampling_prob",
|
|
data=dsl_df
|
|
)
|
|
summary(trial_model)
|
|
|
|
style_model <- dsl(
|
|
model = "lm",
|
|
formula = human_BE_prop ~
|
|
median_PC1 + median_PC4 +
|
|
as.factor(isAuthorWMF) +
|
|
as.factor(author_closer) +
|
|
median_PC3 +
|
|
week_index,
|
|
predicted_var = "human_BE_prop",
|
|
prediction = "olmo_BE_prop",
|
|
sample_prob = "sampling_prob",
|
|
data=dsl_df
|
|
)
|
|
summary(style_model)
|