updating with some plots for new results, results updated with new work

2025-12-16 21:26:11 -08:00 · 2025-12-16 21:26:11 -08:00 · 32fb4ca67c
commit 32fb4ca67c
parent 1584e2cd5f
9 changed files with 63 additions and 33 deletions
--- a/analysis_data/scratch.R
+++ b/analysis_data/scratch.R
@ -1,5 +1,5 @@
 library(tidyverse)
-main_csv <-"~/analysis_data/120725_unified.csv"
+main_csv <-"~/analysis_data/121625_unified.csv"
 main_df <- read.csv(main_csv, header = TRUE) 

 bz_summary <- main_df |>
@ -11,23 +11,6 @@ bz_summary <- main_df |>
  
  

-
-
-
-
-
-
-
-
-
-
-
-
-
-
-dsl_csv <-"~/dsl/120725_DSL_frame.csv"
-dsl_df <- read.csv(dsl_csv, header = TRUE) 
-
 needs_triage <- dsl_df |>
  filter(week_index >= -4) |>
  filter(priority=="Needs Triage") |>
--- a/doc_plots/rq1_plots/121625_c1_ttr.png
+++ b/doc_plots/rq1_plots/121625_c1_ttr.png
--- a/doc_plots/rq1_plots/121625_tasks_created.png
+++ b/doc_plots/rq1_plots/121625_tasks_created.png
--- a/doc_plots/rq1_plots/121625_tasks_status.png
+++ b/doc_plots/rq1_plots/121625_tasks_status.png
--- a/dsl/121625_final_dsl.R
+++ b/dsl/121625_final_dsl.R
@ -24,6 +24,7 @@ dev_model <- dsl(
 summary(dev_model)
 #saveRDS(dev_model, "121625_logit_dsl.RDS")
 dev_model <- readRDS("dsl/121625_logit_dsl.RDS")
+summary(dev_model)
 library(broom)
 library(dplyr)
 tidy.dsl <- function(x, conf.int = FALSE, conf.level = 0.95, exponentiate = FALSE, ...) {
--- a/dsl/final_bivariate.R
+++ b/dsl/final_bivariate.R
@ -1,7 +1,7 @@
 library(tidyverse)
 #library(dsl)
 library(dplyr)
-dsl_csv <-"~/dsl/120725_DSL_frame.csv"
+dsl_csv <-"~/dsl/121625_DSL_frame.csv"
 dsl_df <- read.csv(dsl_csv, header = TRUE) 

 dsl_df <- dsl_df |>
@ -74,7 +74,7 @@ c1_ttr_plot <- dsl_df |>
  ggplot(
    aes(
      x=as.factor(week_index),
-      y= TTR/168, 
+      y= TTR_hours/168, 
      fill=priority
    )
  ) + 
@ -97,11 +97,11 @@ c1_ttr_plot <- dsl_df |>
  theme(legend.position = "top")
 c1_ttr_plot
 ggsave(
-  filename = "120825_c1_ttr.png",
+  filename = "121625_c1_ttr.png",
  plot = c1_ttr_plot,
-  width = 12,    # inches
+  width = 8,    # inches
  height = 6,   # inches
-  dpi = 600     # high resolution
+  dpi = 800     # high resolution
 )

 dsl_df_long <- dsl_df %>%
@ -148,7 +148,8 @@ weekly_summary <- dsl_df |>
    median_olmo_TSOL_prop_adac = median(olmo_TSOL_prop_adac),
    median_olmo_RK_prop_adac = median(olmo_RK_prop_adac),
    median_comments_before_resolution = median(n_comments_before)
-  )
+  ) |>
+  mutate(isAuthorWMF = factor(isAuthorWMF, levels = c("FALSE", "BzImport", "TRUE")))

 tasks_created <- ggplot(
  weekly_summary,
@ -199,20 +200,23 @@ tasks_created <- ggplot(
    aes(x=week_index, y=20, label='Deployment Announcement'),
    size = 2.5) + 
  theme_minimal() + 
-  scale_fill_viridis_d() + 
+  scale_fill_viridis_d(
+    breaks = c("FALSE", "TRUE", "BzImport"),    
+    labels = c("Nonaffiliate", "WMF-affiliate", "BzImport")  
+  ) + 
  labs(
    x = "Weeks from Feature Deployment", 
    y = "Count of Tasks Created", 
-    fill = "Task Author Affiliated with WMF?"
+    fill = "Task Author"
  ) + 
  theme(legend.position = "top")
 tasks_created
 ggsave(
-  filename = "120825_tasks_created.png",
+  filename = "121625_tasks_created.png",
  plot = tasks_created,
  width = 12,    # inches
  height = 6,   # inches
-  dpi = 600     # high resolution
+  dpi = 800     # high resolution
 )

 outcome_summary <- dsl_df |>
--- a/dsl/rq1_plots.R
+++ b/dsl/rq1_plots.R
@ -1,7 +1,7 @@
 library(tidyverse)
 #library(dsl)
 library(dplyr)
-dsl_csv <-"~/dsl/126725_DSL_frame.csv"
+dsl_csv <-"~/dsl/121625_DSL_frame.csv"
 dsl_df <- read.csv(dsl_csv, header = TRUE) 

 dsl_df <- dsl_df |>
@ -73,14 +73,14 @@ dsl_df |>

 # % of tasks declined per week 
 declined_summary <- dsl_df %>%
-  group_by(week_index, task_status, source) %>%
+  group_by(week_index, status, source) %>%
  summarise(count = n(), .groups = 'drop') |>
  group_by(week_index, source) %>%
  mutate(proportion = count / sum(count)) %>%
  ungroup()

 task_status_plot <- declined_summary|>
-  ggplot(aes(x = week_index, y = proportion, fill=task_status,)) +
+  ggplot(aes(x = week_index, y = proportion, fill=status,)) +
  facet_grid(source ~ ., 
             scales = "free_y",
             labeller = labeller(source = c("c1" = "VisualEditor", 
@ -98,12 +98,13 @@ task_status_plot <- declined_summary|>
    size = 2.5) +
  theme_minimal() + 
  theme(legend.position = "top")
+task_status_plot
 ggsave(
-  filename = "120825_tasks_status.png",
+  filename = "121625_tasks_status.png",
  plot = task_status_plot,
  width = 12,    # inches
  height = 6,   # inches
-  dpi = 600     # high resolution
+  dpi = 800     # high resolution
 )


--- a/dsl/scratch.R
+++ b/dsl/scratch.R
@ -0,0 +1,24 @@
+library(tidyverse)
+
+dsl_csv <-"~/dsl/121625_DSL_frame.csv"
+dsl_df <- read.csv(dsl_csv, header = TRUE) 
+
+dsl_df |>
+  filter(week_index >= 4)|>
+  filter(source == "c1") |>
+  filter(priority == "Needs Triage")|>
+  mutate(ttr_weeks = TTR_hours / 168) |>
+  group_by(isAuthorWMF) |>
+  summarise(
+    mean_ttr = mean(ttr_weeks, na.rm = TRUE),
+    sd_ttr = sd(ttr_weeks, na.rm = TRUE)
+  )
+
+triage <- dsl_df |>
+  group_by(source, isAuthorWMF, priority)|>
+  summarise(
+    count = n()
+  )
+
+
+
--- a/mgaughan-rstudio-server_31867859.out
+++ b/mgaughan-rstudio-server_31867859.out
@ -0,0 +1,17 @@
+1. SSH tunnel from your workstation using the following command:
+
+   ssh -N -L 8787:n3441:34063 mjilg@klone.hyak.uw.edu
+
+   and point your web browser to http://localhost:8787
+
+2. log in to RStudio Server using the following credentials:
+
+   user: mjilg
+   password: mGALb8Ae1rRJIi4jmNUf
+
+When done using RStudio Server, terminate the job by:
+
+1. Exit the RStudio Session ("power" button in the top right corner of the RStudio window)
+2. Issue the following command on the login node:
+
+      scancel -f 31867859