running first DSL fit and trying to poke at FELM issue
This commit is contained in:
parent
fb490e37f5
commit
6092e21977
82
dsl/dsl.R
82
dsl/dsl.R
@ -4,51 +4,69 @@ library(dsl)
|
||||
dsl_csv <-"111725_DSL_frame.csv"
|
||||
dsl_df <- read.csv(dsl_csv, header = TRUE)
|
||||
|
||||
|
||||
power_model <- power_dsl(
|
||||
labeled_size = c(100, 200, 300, 600, 1000),
|
||||
base_model <- dsl(
|
||||
model = "logit",
|
||||
formula = dsl_score ~ human_SOL_prop_adac +
|
||||
median_gerrit_loc_delta + median_gerrit_reviewers +
|
||||
as.factor(isAuthorWMF) +
|
||||
as.factor(source) +
|
||||
median_PC3_adac +
|
||||
week_index,
|
||||
predicted_var = "human_SOL_prop_adac",
|
||||
prediction = "olmo_SOL_prop_adac",
|
||||
formula = dsl_score ~ human_EP_prop_adac,
|
||||
predicted_var = "human_EP_prop_adac",
|
||||
prediction = "olmo_EP_prop_adac",
|
||||
sample_prob = "sampling_prob",
|
||||
data=dsl_df
|
||||
)
|
||||
summary(power_model)
|
||||
plot(power_model, coef_name = "human_SOL_prop_adac")
|
||||
summary(base_model)
|
||||
|
||||
case_model <- dsl(
|
||||
model = "logit",
|
||||
formula = dsl_score ~ human_EP_prop_adac + as.factor(source),
|
||||
predicted_var = "human_EP_prop_adac",
|
||||
prediction = "olmo_EP_prop_adac",
|
||||
sample_prob = "sampling_prob",
|
||||
data=dsl_df
|
||||
)
|
||||
summary(case_model)
|
||||
|
||||
trial_model <- dsl(
|
||||
model = "logit",
|
||||
formula = dsl_score ~ human_TSOL_prop_adac +
|
||||
median_gerrit_loc_delta + median_gerrit_reviewers +
|
||||
as.factor(isAuthorWMF) +
|
||||
as.factor(author_closer) +
|
||||
median_PC4_adac +
|
||||
week_index + n_comments_before,
|
||||
predicted_var = "human_TSOL_prop_adac",
|
||||
prediction = "olmo_TSOL_prop_adac",
|
||||
formula = dsl_score ~ human_EP_prop_adac + human_TSOL_prop_adac + human_RK_prop_adac
|
||||
+ as.factor(source) + week_index + as.factor(isAuthorWMF) + median_PC4_adac + n_comments_before,
|
||||
predicted_var = c("human_EP_prop_adac", "human_TSOL_prop_adac", "human_RK_prop_adac"),
|
||||
prediction = c("olmo_EP_prop_adac", "olmo_TSOL_prop_adac", "olmo_RK_prop_adac"),
|
||||
sample_prob = "sampling_prob",
|
||||
data=dsl_df
|
||||
)
|
||||
summary(trial_model)
|
||||
|
||||
style_model <- dsl(
|
||||
model = "lm",
|
||||
formula = human_BE_prop ~
|
||||
median_PC1 + median_PC4 +
|
||||
as.factor(isAuthorWMF) +
|
||||
as.factor(author_closer) +
|
||||
median_PC3 +
|
||||
week_index,
|
||||
predicted_var = "human_BE_prop",
|
||||
prediction = "olmo_BE_prop",
|
||||
anova(dsl_df$olmo_RK_prop, dsl_df$median_gerrit_reviewers)
|
||||
chisq.test(table(dsl_df$isAuthorWMF, dsl_df$author_closer))
|
||||
|
||||
c1_df <- dsl_df |>
|
||||
dplyr::filter(source=="c1")
|
||||
|
||||
felm_model <- dsl(
|
||||
model = "felm",
|
||||
formula = TTR ~ human_EP_prop_adac + human_TSOL_prop_adac + human_RK_prop_adac
|
||||
+ week_index + as.factor(isAuthorWMF) + median_PC4_adac + n_comments_before,
|
||||
predicted_var = c("human_EP_prop_adac", "human_TSOL_prop_adac", "human_RK_prop_adac"),
|
||||
prediction = c("olmo_EP_prop_adac", "olmo_TSOL_prop_adac", "olmo_RK_prop_adac"),
|
||||
sample_prob = "sampling_prob",
|
||||
fixed_effect = "oneway",
|
||||
index = c("source"),
|
||||
cluster="source",
|
||||
data=dsl_df
|
||||
)
|
||||
summary(style_model)
|
||||
summary(felm_model)
|
||||
|
||||
#https://github.com/naoki-egami/dsl/blob/537664a54163dda52ee277071fdfd9e8df2572a6/R/estimate_g.R#L39
|
||||
felm_df <- dsl_df |>
|
||||
dplyr::mutate(ttr_days = TTR / 24)
|
||||
felm_model <- dsl(
|
||||
model = "felm",
|
||||
formula = ttr_days ~ human_EP_prop_adac,
|
||||
predicted_var = c("human_EP_prop_adac"),
|
||||
prediction = c("olmo_EP_prop_adac"),
|
||||
sample_prob = "sampling_prob",
|
||||
fixed_effect = "oneway",
|
||||
index = c("phase"),
|
||||
cluster="phase",
|
||||
data=felm_df
|
||||
)
|
||||
summary(felm_model)
|
||||
|
||||
@ -1,17 +0,0 @@
|
||||
1. SSH tunnel from your workstation using the following command:
|
||||
|
||||
ssh -N -L 8787:n3439:35765 mjilg@klone.hyak.uw.edu
|
||||
|
||||
and point your web browser to http://localhost:8787
|
||||
|
||||
2. log in to RStudio Server using the following credentials:
|
||||
|
||||
user: mjilg
|
||||
password: QKOjN5O9o8KE4QlK+t4M
|
||||
|
||||
When done using RStudio Server, terminate the job by:
|
||||
|
||||
1. Exit the RStudio Session ("power" button in the top right corner of the RStudio window)
|
||||
2. Issue the following command on the login node:
|
||||
|
||||
scancel -f 31035935
|
||||
Loading…
Reference in New Issue
Block a user