library(tidyverse) library(dsl) dsl_csv <-"111725_DSL_frame.csv" dsl_df <- read.csv(dsl_csv, header = TRUE) base_model <- dsl( model = "logit", formula = dsl_score ~ human_EP_prop_adac, predicted_var = "human_EP_prop_adac", prediction = "olmo_EP_prop_adac", sample_prob = "sampling_prob", data=dsl_df ) summary(base_model) case_model <- dsl( model = "logit", formula = dsl_score ~ human_EP_prop_adac + as.factor(source), predicted_var = "human_EP_prop_adac", prediction = "olmo_EP_prop_adac", sample_prob = "sampling_prob", data=dsl_df ) summary(case_model) trial_model <- dsl( model = "logit", formula = dsl_score ~ human_EP_prop_adac + human_TSOL_prop_adac + human_RK_prop_adac + as.factor(source) + week_index + as.factor(isAuthorWMF) + median_PC4_adac + n_comments_before, predicted_var = c("human_EP_prop_adac", "human_TSOL_prop_adac", "human_RK_prop_adac"), prediction = c("olmo_EP_prop_adac", "olmo_TSOL_prop_adac", "olmo_RK_prop_adac"), sample_prob = "sampling_prob", data=dsl_df ) summary(trial_model) anova(dsl_df$olmo_RK_prop, dsl_df$median_gerrit_reviewers) chisq.test(table(dsl_df$isAuthorWMF, dsl_df$author_closer)) c1_df <- dsl_df |> dplyr::filter(source=="c1") felm_model <- dsl( model = "felm", formula = TTR ~ human_EP_prop_adac + human_TSOL_prop_adac + human_RK_prop_adac + week_index + as.factor(isAuthorWMF) + median_PC4_adac + n_comments_before, predicted_var = c("human_EP_prop_adac", "human_TSOL_prop_adac", "human_RK_prop_adac"), prediction = c("olmo_EP_prop_adac", "olmo_TSOL_prop_adac", "olmo_RK_prop_adac"), sample_prob = "sampling_prob", fixed_effect = "oneway", index = c("source"), cluster="source", data=dsl_df ) summary(felm_model) #https://github.com/naoki-egami/dsl/blob/537664a54163dda52ee277071fdfd9e8df2572a6/R/estimate_g.R#L39 felm_df <- dsl_df |> dplyr::mutate(ttr_days = TTR / 24) felm_model <- dsl( model = "felm", formula = ttr_days ~ human_EP_prop_adac, predicted_var = c("human_EP_prop_adac"), prediction = c("olmo_EP_prop_adac"), sample_prob = "sampling_prob", fixed_effect = "oneway", index = c("phase"), cluster="phase", data=felm_df ) summary(felm_model)