1
0
mw-lifecycle-analysis/dsl/dsl.R
2025-11-17 14:03:35 -08:00

73 lines
2.2 KiB
R

library(tidyverse)
library(dsl)
dsl_csv <-"111725_DSL_frame.csv"
dsl_df <- read.csv(dsl_csv, header = TRUE)
base_model <- dsl(
model = "logit",
formula = dsl_score ~ human_EP_prop_adac,
predicted_var = "human_EP_prop_adac",
prediction = "olmo_EP_prop_adac",
sample_prob = "sampling_prob",
data=dsl_df
)
summary(base_model)
case_model <- dsl(
model = "logit",
formula = dsl_score ~ human_EP_prop_adac + as.factor(source),
predicted_var = "human_EP_prop_adac",
prediction = "olmo_EP_prop_adac",
sample_prob = "sampling_prob",
data=dsl_df
)
summary(case_model)
trial_model <- dsl(
model = "logit",
formula = dsl_score ~ human_EP_prop_adac + human_TSOL_prop_adac + human_RK_prop_adac
+ as.factor(source) + week_index + as.factor(isAuthorWMF) + median_PC4_adac + n_comments_before,
predicted_var = c("human_EP_prop_adac", "human_TSOL_prop_adac", "human_RK_prop_adac"),
prediction = c("olmo_EP_prop_adac", "olmo_TSOL_prop_adac", "olmo_RK_prop_adac"),
sample_prob = "sampling_prob",
data=dsl_df
)
summary(trial_model)
anova(dsl_df$olmo_RK_prop, dsl_df$median_gerrit_reviewers)
chisq.test(table(dsl_df$isAuthorWMF, dsl_df$author_closer))
c1_df <- dsl_df |>
dplyr::filter(source=="c1")
felm_model <- dsl(
model = "felm",
formula = TTR ~ human_EP_prop_adac + human_TSOL_prop_adac + human_RK_prop_adac
+ week_index + as.factor(isAuthorWMF) + median_PC4_adac + n_comments_before,
predicted_var = c("human_EP_prop_adac", "human_TSOL_prop_adac", "human_RK_prop_adac"),
prediction = c("olmo_EP_prop_adac", "olmo_TSOL_prop_adac", "olmo_RK_prop_adac"),
sample_prob = "sampling_prob",
fixed_effect = "oneway",
index = c("source"),
cluster="source",
data=dsl_df
)
summary(felm_model)
#https://github.com/naoki-egami/dsl/blob/537664a54163dda52ee277071fdfd9e8df2572a6/R/estimate_g.R#L39
felm_df <- dsl_df |>
dplyr::mutate(ttr_days = TTR / 24)
felm_model <- dsl(
model = "felm",
formula = ttr_days ~ human_EP_prop_adac,
predicted_var = c("human_EP_prop_adac"),
prediction = c("olmo_EP_prop_adac"),
sample_prob = "sampling_prob",
fixed_effect = "oneway",
index = c("phase"),
cluster="phase",
data=felm_df
)
summary(felm_model)