updating with new analysis/new information for write up
This commit is contained in:
parent
a0545ad8de
commit
86ee932c67
8
analysis_data/120125_data_verification.R
Normal file
8
analysis_data/120125_data_verification.R
Normal file
@ -0,0 +1,8 @@
|
||||
library(tidyverse)
|
||||
|
||||
main_csv <- "~/analysis_data/102125_constituent_dfs/110525_olmo_batched_categorized.csv"
|
||||
main_df <- read.csv(main_csv, header = TRUE)
|
||||
|
||||
c3_df <- main_df |>
|
||||
filter(source=="c3")
|
||||
|
||||
@ -10,10 +10,10 @@ library(purrr)
|
||||
# get the categorical variables encoded as integers, then wrapped as factors
|
||||
# figure out power at 200, 400, 500, 750, and 1000
|
||||
#joining sentences with their
|
||||
labeled_csv <-"~/p2/quest/092325_biberplus_complete_labels.csv"
|
||||
labeled_csv <-"~/analysis_data/100625_constituent_dfs/092325_biberplus_complete_labels.csv"
|
||||
labeled_df <- read.csv(labeled_csv, header = TRUE)
|
||||
|
||||
main_csv <- "~/analysis_data/constituent_dfs/071425_master_discussion_data.csv"
|
||||
main_csv <- "~/analysis_data/100625_constituent_dfs/071425_master_discussion_data.csv"
|
||||
main_df <- read.csv(main_csv, header = TRUE)
|
||||
|
||||
dupes_labeled <- labeled_df %>% count(date_created, id, comment_text, AuthorPHID, TaskPHID) %>% filter(n > 1)
|
||||
|
||||
|
Can't render this file because it is too large.
|
@ -1,9 +1,55 @@
|
||||
library(tidyverse)
|
||||
#library(dsl)
|
||||
library(dplyr)
|
||||
dsl_csv <-"~/dsl/110925_DSL_df_adac.csv"
|
||||
dsl_csv <-"~/dsl/111725_DSL_frame.csv"
|
||||
dsl_df <- read.csv(dsl_csv, header = TRUE)
|
||||
|
||||
weekly_summary <- dsl_df |>
|
||||
group_by(week_index, source, isAuthorWMF)|>
|
||||
summarise(
|
||||
tasks_made = sum(!is.na(resolution_outcome)),
|
||||
count_resolution_outcome = sum(dsl_score),
|
||||
author_closer_sum = sum(author_closer == TRUE),
|
||||
median_olmo_EP_prop_adac = median(olmo_EP_prop_adac),
|
||||
median_olmo_TSOL_prop_adac = median(olmo_TSOL_prop_adac),
|
||||
median_comments_before_resolution = median(n_comments_before)
|
||||
)
|
||||
|
||||
ggplot(
|
||||
weekly_summary,
|
||||
aes(
|
||||
x=week_index,
|
||||
y=tasks_made,
|
||||
fill=isAuthorWMF
|
||||
)
|
||||
) +
|
||||
facet_grid(source ~ ., scales = "free_y") +
|
||||
geom_col(position = position_dodge(width = 0.9), width = 0.8) +
|
||||
geom_vline(data = weekly_summary |> filter(source == "c1"),
|
||||
aes(xintercept = -29),
|
||||
linetype = "dotted", color = "black", linewidth = 0.5) +
|
||||
geom_vline(data = weekly_summary |> filter(source == "c1"),
|
||||
aes(xintercept = -9),
|
||||
linetype = "dotted", color = "black", linewidth = 0.5) +
|
||||
geom_vline(data = weekly_summary |> filter(source == "c1"),
|
||||
aes(xintercept = -4),
|
||||
linetype = "3313", color = "black", linewidth = 0.5) +
|
||||
geom_vline(data = weekly_summary |> filter(source == "c2"),
|
||||
aes(xintercept = -99),
|
||||
linetype = "dotted", color = "black", linewidth = 0.5) +
|
||||
geom_vline(data = weekly_summary |> filter(source == "c2"),
|
||||
aes(xintercept = -4),
|
||||
linetype = "3313", color = "black", linewidth = 0.5) +
|
||||
geom_vline(data = weekly_summary |> filter(source == "c3"),
|
||||
aes(xintercept = -97),
|
||||
linetype = "dotted", color = "black", linewidth = 0.5) +
|
||||
geom_vline(data = weekly_summary |> filter(source == "c3"),
|
||||
aes(xintercept = -3),
|
||||
linetype = "3313", color = "black", linewidth = 0.5) +
|
||||
geom_vline(xintercept = 0, linetype = "dashed", color = "black", linewidth = 0.5) +
|
||||
theme_minimal() +
|
||||
scale_fill_viridis_d()
|
||||
|
||||
|
||||
outcome_summary <- dsl_df |>
|
||||
group_by(source, isAuthorWMF)|>
|
||||
@ -23,13 +69,15 @@ library(ggdist)
|
||||
ggplot(
|
||||
dsl_df,
|
||||
aes(
|
||||
x=n_comments_before,
|
||||
color=source,
|
||||
fill=source
|
||||
x=week_index,
|
||||
y=olmo_EP_prop_adac,
|
||||
color=isAuthorWMF
|
||||
)
|
||||
) +
|
||||
facet_grid(~isAuthorWMF) +
|
||||
stat_halfeye() +
|
||||
facet_grid(source ~ .) +
|
||||
geom_point() +
|
||||
geom_smooth() +
|
||||
scale_color_viridis_d() +
|
||||
theme_minimal()
|
||||
|
||||
dsl_df <- dsl_df |>
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
library(tidyverse)
|
||||
|
||||
dsl_csv <-"~/dsl/102725_DSL_df_adac.csv"
|
||||
dsl_csv <-"~/dsl/111725_DSL_frame.csv"
|
||||
dsl_df <- read.csv(dsl_csv, header = TRUE)
|
||||
#https://stats.oarc.ucla.edu/wp-content/uploads/2025/02/survival_r_full.html
|
||||
|
||||
@ -8,10 +8,8 @@ library(survival)
|
||||
library(broom)
|
||||
dsl_df$ttr_weeks <- dsl_df$TTR / 168
|
||||
trial.survival <- Surv(dsl_df$ttr_weeks)
|
||||
trial.model <- coxph(trial.survival ~ isAuthorWMF +
|
||||
median_PC3_adac + week_index +
|
||||
median_gerrit_loc_delta + median_gerrit_reviewers + source +
|
||||
phase + author_closer, data=dsl_df)
|
||||
trial.model <- coxph(trial.survival ~ n_comments_before
|
||||
+ week_index + as.factor(isAuthorWMF) * as.factor(source), data=dsl_df)
|
||||
summary(trial.model)
|
||||
trial.tab <- tidy(trial.model, exponentiate=T, conf.int=T)
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user