updated with new outcome variable

2025-10-03 12:01:37 -07:00 · 2025-10-03 12:01:37 -07:00 · 83bcc15811
commit 83bcc15811
parent 5f157ef532
4 changed files with 377640 additions and 0 deletions
--- a/analysis_data/100325_unified_phab.csv
+++ b/analysis_data/100325_unified_phab.csv
--- a/analysis_data/outcome_variable_check.R
+++ b/analysis_data/outcome_variable_check.R
@ -0,0 +1,26 @@
+library(tidyverse)
+
+main_csv <- "~/analysis_data/092925_unified_phab.csv"
+main_df <- read.csv(main_csv, header = TRUE) 
+
+date1 <-1380499199 # 9-29-2013
+date2 <- 1385510399 # 11-26-2013
+date3 <-1443657599 # 09-30-2015
+
+new_outcome <- main_df |>
+  mutate(
+    resolution_outcome = case_when(
+      comment_type == "task_description" & source == "c1" & date_closed < date1 ~ TRUE,
+      comment_type == "task_description" & source == "c1" & date_closed >= date1 ~ FALSE,
+      comment_type == "task_description" & source == "c2" & date_closed < date2 ~ TRUE,
+      comment_type == "task_description" & source == "c2" & date_closed >= date2 ~ FALSE,
+      comment_type == "task_description" & source == "c3" & date_closed < date3 ~ TRUE,
+      comment_type == "task_description" & source == "c3" & date_closed >= date3 ~ FALSE,
+      comment_type == "task_description" & is.na(date_closed) ~ FALSE,
+      TRUE ~ NA
+    )
+  ) |>
+  select(-closed_relevance)
+anyNA(new_outcome$resolution_outcome[new_outcome$comment_type == "task_description"])
+
+write.csv(new_outcome, "100325_unified_phab.csv", row.names = FALSE)
--- a/analysis_data/roster_confirmation.R
+++ b/analysis_data/roster_confirmation.R
@ -0,0 +1,5 @@
+library(tidyverse)
+library(jsonlite)
+## TODO: reload roster and map onto the existing data 
+roster_csv <-"~/analysis_data/100225_phabricator_api_roster_results.csv"
+roster_df <- read.csv(roster_csv, header = TRUE) 
--- a/analysis_data/stale_unifieds/092925_unified_phab.csv
+++ b/analysis_data/stale_unifieds/092925_unified_phab.csv