1
0

adding small updates to results scripts

This commit is contained in:
Matthew Gaughan 2025-11-26 13:10:20 -08:00
parent f37dac73f4
commit a0545ad8de
2 changed files with 15 additions and 19 deletions

View File

@ -68,10 +68,10 @@ summary(felm_model)
dev_model <- dsl( dev_model <- dsl(
model = "logit", model = "logit",
formula = task_resolution ~ as.factor(isAuthorWMF) * as.factor(source) * human_EP_prop_adac + human_TSOL_prop_adac + human_RK_prop_adac formula = task_resolution ~ human_EP_prop_adac + human_TSOL_prop_adac + human_RK_prop_adac
+ median_PC4_adac + median_PC3_adac + n_comments_before + median_PC4_adac + median_PC3_adac + n_comments_before
+ median_gerrit_reviewers + median_gerrit_loc_delta + median_gerrit_reviewers + median_gerrit_loc_delta
+ week_index, + week_index + as.factor(isAuthorWMF) * as.factor(source),
predicted_var = c("human_EP_prop_adac", "human_TSOL_prop_adac", "human_RK_prop_adac"), predicted_var = c("human_EP_prop_adac", "human_TSOL_prop_adac", "human_RK_prop_adac"),
prediction = c("olmo_EP_prop_adac", "olmo_TSOL_prop_adac", "olmo_RK_prop_adac"), prediction = c("olmo_EP_prop_adac", "olmo_TSOL_prop_adac", "olmo_RK_prop_adac"),
sample_prob = "sampling_prob", sample_prob = "sampling_prob",
@ -105,7 +105,7 @@ ggplot(coef_df, aes(x = estimate, y = term)) +
geom_point(size = 1) + geom_point(size = 1) +
geom_errorbar(aes(xmin = estimate - 1.96*std.error, xmax = estimate + 1.96 *std.error), height = 0.2) + geom_errorbar(aes(xmin = estimate - 1.96*std.error, xmax = estimate + 1.96 *std.error), height = 0.2) +
geom_vline(xintercept = 0, linetype = "dashed", color = "red") + geom_vline(xintercept = 0, linetype = "dashed", color = "red") +
labs(title = "Fixed Effects Model Coefficients", labs(title = "DSL Logit Model Coefficients",
x = "Coefficient Estimate", x = "Coefficient Estimate",
y = "Variable") + y = "Variable") +
theme_minimal() theme_minimal()

View File

@ -9,12 +9,12 @@ library(dplyr)
#pca_csv <- "~/p2/quest/102025_total_pca_df.csv" #pca_csv <- "~/p2/quest/102025_total_pca_df.csv"
#pca_df <- read.csv(pca_csv , header = TRUE) |> mutate(comment_text = text) #pca_df <- read.csv(pca_csv , header = TRUE) |> mutate(comment_text = text)
main_csv <- "~/analysis_data/102725_unified.csv" main_csv <- "~/analysis_data/110925_unified.csv"
main_df <- read.csv(main_csv , header = TRUE) main_df <- read.csv(main_csv , header = TRUE)
main_df <- main_df |> main_df <- main_df |>
mutate( mutate(
comment_wordcount = as.integer(str_count(replace_na(as.character(comment_text), ""), "\\S+")) comment_wordcount = as.integer(stringr::str_count(tidyr::replace_na(as.character(comment_text), ""), "\\S+"))
) )
@ -25,24 +25,20 @@ description_df <- main_df |>
replies_df <- main_df |> replies_df <- main_df |>
filter(comment_type == "task_subcomment") |> filter(comment_type == "task_subcomment") |>
filter(isGerritBot != TRUE) |> filter(isGerritBot != TRUE)
left_join(
description_df,
by="TaskPHID"
)
library(ggplot2)
ggplot(replies_df, aes(x = autho, y = PC3, fill = comment_type)) + ggplot(replies_df, aes(x = PC3, y = PC4, fill = isAuthorWMF)) +
facet_grid(source~phase, scales="fixed") + facet_grid(ADAC~source, scales="fixed") +
geom_point(shape = 21, alpha=0.3, size=2) + geom_point(shape = 21, alpha=0.15, size=3) +
xlim(-30, 30) + xlim(-50, 50) +
ylim(-30, 30) + ylim(-50, 50) +
scale_fill_viridis_d() + scale_fill_viridis_d() +
theme_minimal() + theme_minimal() +
labs( labs(
title = "PCs for Task Comments (Faceted by source and phase)", title = "PCs for Task Comments (Faceted by source (column))",
x = "PC4", x = "PC3",
y = "PC3", y = "PC4",
) )