updating for new bivariate plots
This commit is contained in:
parent
2efd961fed
commit
6f2858dd72
@ -15,7 +15,7 @@ library(ggdist)
|
||||
|
||||
ggplot(main_df, aes(x = week_index, y = n_comments, fill = isAuthorWMF)) +
|
||||
facet_grid(~source) +
|
||||
geom_dots(side = "both", layout = "hex", stackratio = 0.92) +
|
||||
geom_dots(side= "both", layout = "hex", stackratio = 0.92) +
|
||||
scale_fill_viridis_d() +
|
||||
xlim(-130, 15) +
|
||||
theme_minimal() +
|
||||
|
||||
@ -7,6 +7,19 @@ library(purrr)
|
||||
unified_csv <-"~/analysis_data/102725_unified.csv"
|
||||
unified_df <- read.csv(unified_csv, header = TRUE)
|
||||
|
||||
unified_df <- unified_df |> mutate(has_update = !is.na(olmo_sentence_labels) &
|
||||
grepl("(ACTION ON ISSUE|TASK PROGRESS)", as.character(olmo_sentence_labels),
|
||||
ignore.case = TRUE),
|
||||
has_update = ifelse(
|
||||
has_update, 1, 0
|
||||
))
|
||||
|
||||
model <- glm(has_update ~ PC1 + PC2 + PC3 + PC4 + modal_verbs,
|
||||
family = binomial(link = "logit"),
|
||||
data = unified_df)
|
||||
summary(model)
|
||||
|
||||
|
||||
|
||||
unified_df |>
|
||||
ggplot(
|
||||
|
||||
56
dsl/dsl.R
Normal file
56
dsl/dsl.R
Normal file
@ -0,0 +1,56 @@
|
||||
library(tidyverse)
|
||||
library(dsl)
|
||||
|
||||
dsl_csv <-"~/dsl/102725_DSL_df_adac.csv"
|
||||
dsl_df <- read.csv(dsl_csv, header = TRUE)
|
||||
|
||||
|
||||
power_model <- power_dsl(
|
||||
labeled_size = c(100, 200, 300, 600, 1000),
|
||||
model = "logit",
|
||||
formula = dsl_score ~ human_SOL_prop_adac +
|
||||
median_gerrit_loc_delta + median_gerrit_reviewers +
|
||||
as.factor(isAuthorWMF) +
|
||||
as.factor(source) +
|
||||
median_PC3_adac +
|
||||
week_index,
|
||||
predicted_var = "human_SOL_prop_adac",
|
||||
prediction = "olmo_SOL_prop_adac",
|
||||
sample_prob = "sampling_prob",
|
||||
data=dsl_df
|
||||
)
|
||||
summary(power_model)
|
||||
plot(power_model, coef_name = "human_SOL_prop_adac")
|
||||
|
||||
dsl_df <- dsl_df |>
|
||||
filter(source=="c1")
|
||||
|
||||
trial_model <- dsl(
|
||||
model = "logit",
|
||||
formula = dsl_score ~ human_BI_prop_adac +
|
||||
median_gerrit_loc_delta + median_gerrit_reviewers +
|
||||
as.factor(isAuthorWMF) +
|
||||
as.factor(author_closer) +
|
||||
median_PC4_adac +
|
||||
week_index,
|
||||
predicted_var = "human_BI_prop_adac",
|
||||
prediction = "olmo_BI_prop_adac",
|
||||
sample_prob = "sampling_prob",
|
||||
data=dsl_df
|
||||
)
|
||||
summary(trial_model)
|
||||
|
||||
style_model <- dsl(
|
||||
model = "lm",
|
||||
formula = human_BE_prop ~
|
||||
median_PC1 + median_PC4 +
|
||||
as.factor(isAuthorWMF) +
|
||||
as.factor(author_closer) +
|
||||
median_PC3 +
|
||||
week_index,
|
||||
predicted_var = "human_BE_prop",
|
||||
prediction = "olmo_BE_prop",
|
||||
sample_prob = "sampling_prob",
|
||||
data=dsl_df
|
||||
)
|
||||
summary(style_model)
|
||||
45
dsl/final_bivariate.R
Normal file
45
dsl/final_bivariate.R
Normal file
@ -0,0 +1,45 @@
|
||||
library(tidyverse)
|
||||
#library(dsl)
|
||||
library(dplyr)
|
||||
dsl_csv <-"~/dsl/102725_DSL_df_adac.csv"
|
||||
dsl_df <- read.csv(dsl_csv, header = TRUE)
|
||||
|
||||
outcome_summary <- dsl_df |>
|
||||
group_by(source, isAuthorWMF)|>
|
||||
summarise(
|
||||
total_sum = sum(!is.na(resolution_outcome)),
|
||||
count_resolution_outcome = sum(resolution_outcome),
|
||||
success_prop = count_resolution_outcome / total_sum,
|
||||
median_ttr_days = median(TTR, na.rm = TRUE) / 24
|
||||
)
|
||||
|
||||
|
||||
library(ggplot2)
|
||||
library(ggdist)
|
||||
|
||||
|
||||
signed_power <- function(x, p) {
|
||||
sign(x) * abs(x) ^ p
|
||||
}
|
||||
|
||||
signed_log <- function(x) sign(x) * log1p(abs(x))
|
||||
dsl_df <- dsl_df |>
|
||||
mutate(
|
||||
sp_med_pc3_adac = signed_power(median_PC3_adac, 0.2),
|
||||
sp_med_pc4_adac = signed_power(median_PC4_adac, 0.2),
|
||||
sl_med_pc4_adac = signed_log(median_PC4_adac),
|
||||
sl_med_pc3_adac = signed_log(median_PC3_adac)
|
||||
)
|
||||
|
||||
|
||||
ggplot(dsl_df, aes(
|
||||
y= log1p(TTR/24),
|
||||
x=sl_med_pc4_adac,
|
||||
shape=isAuthorWMF,
|
||||
color=isAuthorWMF
|
||||
)) +
|
||||
facet_grid(~source) +
|
||||
theme_minimal() +
|
||||
geom_smooth(method="loess", span=0.5) +
|
||||
geom_point() +
|
||||
scale_color_viridis_d()
|
||||
@ -3,8 +3,6 @@ library(tidyverse)
|
||||
dsl_csv <-"~/dsl/102725_DSL_df_adac.csv"
|
||||
dsl_df <- read.csv(dsl_csv, header = TRUE)
|
||||
#https://stats.oarc.ucla.edu/wp-content/uploads/2025/02/survival_r_full.html
|
||||
dsl_df <- dsl_df |>
|
||||
filter(source == "c1")
|
||||
|
||||
library(survival)
|
||||
library(broom)
|
||||
@ -12,8 +10,8 @@ dsl_df$ttr_weeks <- dsl_df$TTR / 168
|
||||
trial.survival <- Surv(dsl_df$ttr_weeks)
|
||||
trial.model <- coxph(trial.survival ~ isAuthorWMF +
|
||||
median_PC3_adac + week_index +
|
||||
median_gerrit_loc_delta + median_gerrit_reviewers +
|
||||
olmo_BI_prop_adac, data=dsl_df)
|
||||
median_gerrit_loc_delta + median_gerrit_reviewers + source +
|
||||
phase + author_closer, data=dsl_df)
|
||||
summary(trial.model)
|
||||
trial.tab <- tidy(trial.model, exponentiate=T, conf.int=T)
|
||||
|
||||
@ -26,3 +24,5 @@ ggplot(trial.tab,
|
||||
|
||||
surv.at.means <- survfit(trial.model)
|
||||
plot(surv.at.means, xlab="weeks", ylab="survival probability")
|
||||
#https://hbiostat.org/stat/binarysurv
|
||||
plot(surv.at.means, xlab = "weeks", ylab = "survival probability", xlim = c(0, 26))
|
||||
|
||||
17
mgaughan-rstudio-server_30651103.out
Normal file
17
mgaughan-rstudio-server_30651103.out
Normal file
@ -0,0 +1,17 @@
|
||||
1. SSH tunnel from your workstation using the following command:
|
||||
|
||||
ssh -N -L 8787:n3439:51687 mjilg@klone.hyak.uw.edu
|
||||
|
||||
and point your web browser to http://localhost:8787
|
||||
|
||||
2. log in to RStudio Server using the following credentials:
|
||||
|
||||
user: mjilg
|
||||
password: JZoMumQwFbIc7EUcaGbK
|
||||
|
||||
When done using RStudio Server, terminate the job by:
|
||||
|
||||
1. Exit the RStudio Session ("power" button in the top right corner of the RStudio window)
|
||||
2. Issue the following command on the login node:
|
||||
|
||||
scancel -f 30651103
|
||||
Loading…
Reference in New Issue
Block a user