1
0

updating with some plots for new results, results updated with new work

This commit is contained in:
Matthew Gaughan 2025-12-16 21:26:11 -08:00
parent 1584e2cd5f
commit 32fb4ca67c
9 changed files with 63 additions and 33 deletions

View File

@ -1,5 +1,5 @@
library(tidyverse)
main_csv <-"~/analysis_data/120725_unified.csv"
main_csv <-"~/analysis_data/121625_unified.csv"
main_df <- read.csv(main_csv, header = TRUE)
bz_summary <- main_df |>
@ -11,23 +11,6 @@ bz_summary <- main_df |>
dsl_csv <-"~/dsl/120725_DSL_frame.csv"
dsl_df <- read.csv(dsl_csv, header = TRUE)
needs_triage <- dsl_df |>
filter(week_index >= -4) |>
filter(priority=="Needs Triage") |>

Binary file not shown.

After

Width:  |  Height:  |  Size: 359 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 453 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 475 KiB

View File

@ -24,6 +24,7 @@ dev_model <- dsl(
summary(dev_model)
#saveRDS(dev_model, "121625_logit_dsl.RDS")
dev_model <- readRDS("dsl/121625_logit_dsl.RDS")
summary(dev_model)
library(broom)
library(dplyr)
tidy.dsl <- function(x, conf.int = FALSE, conf.level = 0.95, exponentiate = FALSE, ...) {

View File

@ -1,7 +1,7 @@
library(tidyverse)
#library(dsl)
library(dplyr)
dsl_csv <-"~/dsl/120725_DSL_frame.csv"
dsl_csv <-"~/dsl/121625_DSL_frame.csv"
dsl_df <- read.csv(dsl_csv, header = TRUE)
dsl_df <- dsl_df |>
@ -74,7 +74,7 @@ c1_ttr_plot <- dsl_df |>
ggplot(
aes(
x=as.factor(week_index),
y= TTR/168,
y= TTR_hours/168,
fill=priority
)
) +
@ -97,11 +97,11 @@ c1_ttr_plot <- dsl_df |>
theme(legend.position = "top")
c1_ttr_plot
ggsave(
filename = "120825_c1_ttr.png",
filename = "121625_c1_ttr.png",
plot = c1_ttr_plot,
width = 12, # inches
width = 8, # inches
height = 6, # inches
dpi = 600 # high resolution
dpi = 800 # high resolution
)
dsl_df_long <- dsl_df %>%
@ -148,7 +148,8 @@ weekly_summary <- dsl_df |>
median_olmo_TSOL_prop_adac = median(olmo_TSOL_prop_adac),
median_olmo_RK_prop_adac = median(olmo_RK_prop_adac),
median_comments_before_resolution = median(n_comments_before)
)
) |>
mutate(isAuthorWMF = factor(isAuthorWMF, levels = c("FALSE", "BzImport", "TRUE")))
tasks_created <- ggplot(
weekly_summary,
@ -199,20 +200,23 @@ tasks_created <- ggplot(
aes(x=week_index, y=20, label='Deployment Announcement'),
size = 2.5) +
theme_minimal() +
scale_fill_viridis_d() +
scale_fill_viridis_d(
breaks = c("FALSE", "TRUE", "BzImport"),
labels = c("Nonaffiliate", "WMF-affiliate", "BzImport")
) +
labs(
x = "Weeks from Feature Deployment",
y = "Count of Tasks Created",
fill = "Task Author Affiliated with WMF?"
fill = "Task Author"
) +
theme(legend.position = "top")
tasks_created
ggsave(
filename = "120825_tasks_created.png",
filename = "121625_tasks_created.png",
plot = tasks_created,
width = 12, # inches
height = 6, # inches
dpi = 600 # high resolution
dpi = 800 # high resolution
)
outcome_summary <- dsl_df |>

View File

@ -1,7 +1,7 @@
library(tidyverse)
#library(dsl)
library(dplyr)
dsl_csv <-"~/dsl/126725_DSL_frame.csv"
dsl_csv <-"~/dsl/121625_DSL_frame.csv"
dsl_df <- read.csv(dsl_csv, header = TRUE)
dsl_df <- dsl_df |>
@ -73,14 +73,14 @@ dsl_df |>
# % of tasks declined per week
declined_summary <- dsl_df %>%
group_by(week_index, task_status, source) %>%
group_by(week_index, status, source) %>%
summarise(count = n(), .groups = 'drop') |>
group_by(week_index, source) %>%
mutate(proportion = count / sum(count)) %>%
ungroup()
task_status_plot <- declined_summary|>
ggplot(aes(x = week_index, y = proportion, fill=task_status,)) +
ggplot(aes(x = week_index, y = proportion, fill=status,)) +
facet_grid(source ~ .,
scales = "free_y",
labeller = labeller(source = c("c1" = "VisualEditor",
@ -98,12 +98,13 @@ task_status_plot <- declined_summary|>
size = 2.5) +
theme_minimal() +
theme(legend.position = "top")
task_status_plot
ggsave(
filename = "120825_tasks_status.png",
filename = "121625_tasks_status.png",
plot = task_status_plot,
width = 12, # inches
height = 6, # inches
dpi = 600 # high resolution
dpi = 800 # high resolution
)

24
dsl/scratch.R Normal file
View File

@ -0,0 +1,24 @@
library(tidyverse)
dsl_csv <-"~/dsl/121625_DSL_frame.csv"
dsl_df <- read.csv(dsl_csv, header = TRUE)
dsl_df |>
filter(week_index >= 4)|>
filter(source == "c1") |>
filter(priority == "Needs Triage")|>
mutate(ttr_weeks = TTR_hours / 168) |>
group_by(isAuthorWMF) |>
summarise(
mean_ttr = mean(ttr_weeks, na.rm = TRUE),
sd_ttr = sd(ttr_weeks, na.rm = TRUE)
)
triage <- dsl_df |>
group_by(source, isAuthorWMF, priority)|>
summarise(
count = n()
)

View File

@ -0,0 +1,17 @@
1. SSH tunnel from your workstation using the following command:
ssh -N -L 8787:n3441:34063 mjilg@klone.hyak.uw.edu
and point your web browser to http://localhost:8787
2. log in to RStudio Server using the following credentials:
user: mjilg
password: mGALb8Ae1rRJIi4jmNUf
When done using RStudio Server, terminate the job by:
1. Exit the RStudio Session ("power" button in the top right corner of the RStudio window)
2. Issue the following command on the login node:
scancel -f 31867859