adding some more metadata to the DSL aggregation files
This commit is contained in:
parent
be587982d7
commit
7555259a3e
File diff suppressed because one or more lines are too long
@ -40,6 +40,9 @@ main_df <- main_df |>
|
||||
!is.na(task_desc_author) &
|
||||
AuthorPHID == task_desc_author &
|
||||
(is.na(task_desc_dateClosed) | created < task_desc_dateClosed)
|
||||
),
|
||||
before_close = as.integer(
|
||||
(is.na(task_desc_dateClosed) | created < task_desc_dateClosed)
|
||||
)
|
||||
)
|
||||
# add dictionary values
|
||||
|
||||
3130
dsl/110925_DSL_df_adac.csv
Normal file
3130
dsl/110925_DSL_df_adac.csv
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,6 +1,6 @@
|
||||
library(tidyverse)
|
||||
|
||||
unified_csv <-"~/analysis_data/102725_unified.csv"
|
||||
unified_csv <-"~/analysis_data/110925_unified.csv"
|
||||
unified_df <- read.csv(unified_csv, header = TRUE)
|
||||
|
||||
# 1. aggregate to the task level
|
||||
@ -223,12 +223,14 @@ task_level_variables <- unified_df |>
|
||||
median_PC4 = median(PC4),
|
||||
median_PC4_adac = median(PC4[ADAC==1]),
|
||||
median_PC4_no_adac = median(PC4[ADAC==0]),
|
||||
n_comments = sum(!is.na(id)),
|
||||
n_comments_before = sum(before_close)
|
||||
)
|
||||
|
||||
descriptions <- unified_df |>
|
||||
filter(comment_type == "task_description")|>
|
||||
select(TaskPHID, task_title, date_created, date_closed, isAuthorWMF,
|
||||
source, phase, week_index, author_closer, resolution_outcome )
|
||||
source, phase, week_index, author_closer, resolution_outcome, priority )
|
||||
|
||||
task_level_variables <- task_level_variables |>
|
||||
left_join(
|
||||
@ -286,21 +288,6 @@ ggplot(task_level_variables, aes(
|
||||
fill = "Resolution Outcome"
|
||||
)
|
||||
|
||||
ggplot(task_level_variables, aes(
|
||||
x = median_PC3_ADAC,
|
||||
y = TTR,
|
||||
fill = isAuthorWMF
|
||||
)) +
|
||||
facet_grid(~source, scales="fixed") +
|
||||
geom_point(shape = 21, alpha=0.3, size=2) +
|
||||
xlim(-20, 20) +
|
||||
ylim(0, 1440) +
|
||||
scale_fill_viridis_d() +
|
||||
theme_minimal() +
|
||||
labs(
|
||||
title = "Median PC3 Value in ADAC Comments",
|
||||
x = "Median PC3 Value",
|
||||
y = "Time to Resolution (up to 60 days)",
|
||||
)
|
||||
|
||||
# 4. save
|
||||
write.csv(task_level_variables, "102725_DSL_df_adac.csv", row.names = FALSE)
|
||||
write.csv(task_level_variables, "110925_DSL_df_adac.csv", row.names = FALSE)
|
||||
|
||||
@ -1,16 +1,18 @@
|
||||
library(tidyverse)
|
||||
#library(dsl)
|
||||
library(dplyr)
|
||||
dsl_csv <-"~/dsl/102725_DSL_df_adac.csv"
|
||||
dsl_csv <-"~/dsl/110925_DSL_df_adac.csv"
|
||||
dsl_df <- read.csv(dsl_csv, header = TRUE)
|
||||
|
||||
|
||||
outcome_summary <- dsl_df |>
|
||||
group_by(source, isAuthorWMF)|>
|
||||
summarise(
|
||||
total_sum = sum(!is.na(resolution_outcome)),
|
||||
count_resolution_outcome = sum(resolution_outcome),
|
||||
success_prop = count_resolution_outcome / total_sum,
|
||||
median_ttr_days = median(TTR, na.rm = TRUE) / 24
|
||||
median_ttr_days = median(TTR, na.rm = TRUE) / 24,
|
||||
median_comments_before_resolution = median(n_comments_before)
|
||||
)
|
||||
|
||||
|
||||
@ -18,6 +20,32 @@ library(ggplot2)
|
||||
library(ggdist)
|
||||
|
||||
|
||||
ggplot(
|
||||
dsl_df,
|
||||
aes(
|
||||
x=n_comments_before,
|
||||
color=source,
|
||||
fill=source
|
||||
)
|
||||
) +
|
||||
facet_grid(~isAuthorWMF) +
|
||||
stat_halfeye() +
|
||||
theme_minimal()
|
||||
|
||||
dsl_df <- dsl_df |>
|
||||
mutate(priority = factor(priority,
|
||||
levels = c("Unbreak Now!", "High", "Medium", "Low", "Lowest", "Needs Triage")))
|
||||
|
||||
ggplot(dsl_df,
|
||||
aes(
|
||||
fill=resolution_outcome,
|
||||
x=priority
|
||||
)) +
|
||||
facet_grid(~source) +
|
||||
geom_bar() +
|
||||
theme_minimal()
|
||||
|
||||
|
||||
signed_power <- function(x, p) {
|
||||
sign(x) * abs(x) ^ p
|
||||
}
|
||||
|
||||
Loading…
Reference in New Issue
Block a user