adding next iteration of model fit to dsl
This commit is contained in:
parent
13d2113b73
commit
495be027e7
33
dsl/dsl.R
33
dsl/dsl.R
@ -4,6 +4,10 @@ library(dsl)
|
|||||||
dsl_csv <-"111725_DSL_frame.csv"
|
dsl_csv <-"111725_DSL_frame.csv"
|
||||||
dsl_df <- read.csv(dsl_csv, header = TRUE)
|
dsl_df <- read.csv(dsl_csv, header = TRUE)
|
||||||
|
|
||||||
|
dsl_df <- dsl_df |>
|
||||||
|
dplyr::mutate(ttr_days = TTR / 24) |>
|
||||||
|
dplyr::mutate(task_resolution = dsl_score)
|
||||||
|
|
||||||
base_model <- dsl(
|
base_model <- dsl(
|
||||||
model = "logit",
|
model = "logit",
|
||||||
formula = dsl_score ~ human_EP_prop_adac,
|
formula = dsl_score ~ human_EP_prop_adac,
|
||||||
@ -27,7 +31,8 @@ summary(case_model)
|
|||||||
logit_model <- dsl(
|
logit_model <- dsl(
|
||||||
model = "logit",
|
model = "logit",
|
||||||
formula = dsl_score ~ human_EP_prop_adac + human_TSOL_prop_adac + human_RK_prop_adac
|
formula = dsl_score ~ human_EP_prop_adac + human_TSOL_prop_adac + human_RK_prop_adac
|
||||||
+ week_index + as.factor(isAuthorWMF) + median_PC4_adac + n_comments_before + as.factor(source),
|
+ week_index + as.factor(isAuthorWMF) + median_PC4_adac + median_PC3_adac + n_comments_before + as.factor(source) +
|
||||||
|
median_gerrit_reviewers,
|
||||||
predicted_var = c("human_EP_prop_adac", "human_TSOL_prop_adac", "human_RK_prop_adac"),
|
predicted_var = c("human_EP_prop_adac", "human_TSOL_prop_adac", "human_RK_prop_adac"),
|
||||||
prediction = c("olmo_EP_prop_adac", "olmo_TSOL_prop_adac", "olmo_RK_prop_adac"),
|
prediction = c("olmo_EP_prop_adac", "olmo_TSOL_prop_adac", "olmo_RK_prop_adac"),
|
||||||
sample_prob = "sampling_prob",
|
sample_prob = "sampling_prob",
|
||||||
@ -39,15 +44,14 @@ logit_model <- dsl(
|
|||||||
summary(logit_model)
|
summary(logit_model)
|
||||||
#anova(dsl_df$olmo_RK_prop, dsl_df$median_gerrit_reviewers)
|
#anova(dsl_df$olmo_RK_prop, dsl_df$median_gerrit_reviewers)
|
||||||
#chisq.test(table(dsl_df$isAuthorWMF, dsl_df$author_closer))
|
#chisq.test(table(dsl_df$isAuthorWMF, dsl_df$author_closer))
|
||||||
felm_df <- dsl_df |>
|
|
||||||
dplyr::mutate(ttr_days = TTR / 24)
|
|
||||||
# https://cscu.cornell.edu/wp-content/uploads/clust.pdf
|
# https://cscu.cornell.edu/wp-content/uploads/clust.pdf
|
||||||
# https://statmodeling.stat.columbia.edu/2020/01/10/linear-or-logistic-regression-with-binary-outcomes/
|
# https://statmodeling.stat.columbia.edu/2020/01/10/linear-or-logistic-regression-with-binary-outcomes/
|
||||||
# https://osf.io/preprints/psyarxiv/4gmbv_v1
|
# https://osf.io/preprints/psyarxiv/4gmbv_v1
|
||||||
felm_model <- dsl(
|
felm_model <- dsl(
|
||||||
model = "felm",
|
model = "felm",
|
||||||
formula = dsl_score ~ human_EP_prop_adac + human_TSOL_prop_adac + human_RK_prop_adac +
|
formula = dsl_score ~ human_EP_prop_adac + human_TSOL_prop_adac + human_RK_prop_adac +
|
||||||
week_index + median_PC4_adac + n_comments_before + + isAuthorWMF + median_gerrit_reviewers,
|
phase
|
||||||
|
+ median_PC4_adac + median_PC3_adac + n_comments_before + + isAuthorWMF,
|
||||||
predicted_var = c("human_EP_prop_adac", "human_TSOL_prop_adac", "human_RK_prop_adac"),
|
predicted_var = c("human_EP_prop_adac", "human_TSOL_prop_adac", "human_RK_prop_adac"),
|
||||||
prediction = c("olmo_EP_prop_adac", "olmo_TSOL_prop_adac", "olmo_RK_prop_adac"),
|
prediction = c("olmo_EP_prop_adac", "olmo_TSOL_prop_adac", "olmo_RK_prop_adac"),
|
||||||
sample_prob = "sampling_prob",
|
sample_prob = "sampling_prob",
|
||||||
@ -59,8 +63,25 @@ felm_model <- dsl(
|
|||||||
data=felm_df
|
data=felm_df
|
||||||
)
|
)
|
||||||
summary(felm_model)
|
summary(felm_model)
|
||||||
|
#httpsfelm_model#https://github.com/naoki-egami/dsl/blob/537664a54163dda52ee277071fdfd9e8df2572a6/R/estimate_g.R#L39
|
||||||
|
|
||||||
|
|
||||||
|
dev_model <- dsl(
|
||||||
|
model = "logit",
|
||||||
|
formula = task_resolution ~ human_EP_prop_adac + human_TSOL_prop_adac + human_RK_prop_adac
|
||||||
|
+ median_PC4_adac + median_PC3_adac + n_comments_before
|
||||||
|
+ median_gerrit_reviewers + median_gerrit_loc_delta
|
||||||
|
+ week_index + as.factor(source) * as.factor(isAuthorWMF),
|
||||||
|
predicted_var = c("human_EP_prop_adac", "human_TSOL_prop_adac", "human_RK_prop_adac"),
|
||||||
|
prediction = c("olmo_EP_prop_adac", "olmo_TSOL_prop_adac", "olmo_RK_prop_adac"),
|
||||||
|
sample_prob = "sampling_prob",
|
||||||
|
cluster="source",
|
||||||
|
cross_fit = 3,
|
||||||
|
sample_split = 20,
|
||||||
|
data=dsl_df
|
||||||
|
)
|
||||||
|
summary(dev_model)
|
||||||
|
|
||||||
#https://github.com/naoki-egami/dsl/blob/537664a54163dda52ee277071fdfd9e8df2572a6/R/estimate_g.R#L39
|
|
||||||
|
|
||||||
library(broom)
|
library(broom)
|
||||||
library(dplyr)
|
library(dplyr)
|
||||||
@ -79,7 +100,7 @@ tidy.dsl <- function(x, conf.int = FALSE, conf.level = 0.95, exponentiate = FALS
|
|||||||
out <- broom:::exponentiate(out)
|
out <- broom:::exponentiate(out)
|
||||||
return(out)
|
return(out)
|
||||||
}
|
}
|
||||||
coef_df <- tidy.dsl(felm_model)
|
coef_df <- tidy.dsl(dev_model)
|
||||||
ggplot(coef_df, aes(x = estimate, y = term)) +
|
ggplot(coef_df, aes(x = estimate, y = term)) +
|
||||||
geom_point(size = 1) +
|
geom_point(size = 1) +
|
||||||
geom_errorbar(aes(xmin = estimate - 1.96*std.error, xmax = estimate + 1.96 *std.error), height = 0.2) +
|
geom_errorbar(aes(xmin = estimate - 1.96*std.error, xmax = estimate + 1.96 *std.error), height = 0.2) +
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user