adding some more metadata to the DSL aggregation files
This commit is contained in:
parent
be587982d7
commit
7555259a3e
File diff suppressed because one or more lines are too long
@ -40,6 +40,9 @@ main_df <- main_df |>
|
|||||||
!is.na(task_desc_author) &
|
!is.na(task_desc_author) &
|
||||||
AuthorPHID == task_desc_author &
|
AuthorPHID == task_desc_author &
|
||||||
(is.na(task_desc_dateClosed) | created < task_desc_dateClosed)
|
(is.na(task_desc_dateClosed) | created < task_desc_dateClosed)
|
||||||
|
),
|
||||||
|
before_close = as.integer(
|
||||||
|
(is.na(task_desc_dateClosed) | created < task_desc_dateClosed)
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
# add dictionary values
|
# add dictionary values
|
||||||
|
|||||||
3130
dsl/110925_DSL_df_adac.csv
Normal file
3130
dsl/110925_DSL_df_adac.csv
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,6 +1,6 @@
|
|||||||
library(tidyverse)
|
library(tidyverse)
|
||||||
|
|
||||||
unified_csv <-"~/analysis_data/102725_unified.csv"
|
unified_csv <-"~/analysis_data/110925_unified.csv"
|
||||||
unified_df <- read.csv(unified_csv, header = TRUE)
|
unified_df <- read.csv(unified_csv, header = TRUE)
|
||||||
|
|
||||||
# 1. aggregate to the task level
|
# 1. aggregate to the task level
|
||||||
@ -223,12 +223,14 @@ task_level_variables <- unified_df |>
|
|||||||
median_PC4 = median(PC4),
|
median_PC4 = median(PC4),
|
||||||
median_PC4_adac = median(PC4[ADAC==1]),
|
median_PC4_adac = median(PC4[ADAC==1]),
|
||||||
median_PC4_no_adac = median(PC4[ADAC==0]),
|
median_PC4_no_adac = median(PC4[ADAC==0]),
|
||||||
|
n_comments = sum(!is.na(id)),
|
||||||
|
n_comments_before = sum(before_close)
|
||||||
)
|
)
|
||||||
|
|
||||||
descriptions <- unified_df |>
|
descriptions <- unified_df |>
|
||||||
filter(comment_type == "task_description")|>
|
filter(comment_type == "task_description")|>
|
||||||
select(TaskPHID, task_title, date_created, date_closed, isAuthorWMF,
|
select(TaskPHID, task_title, date_created, date_closed, isAuthorWMF,
|
||||||
source, phase, week_index, author_closer, resolution_outcome )
|
source, phase, week_index, author_closer, resolution_outcome, priority )
|
||||||
|
|
||||||
task_level_variables <- task_level_variables |>
|
task_level_variables <- task_level_variables |>
|
||||||
left_join(
|
left_join(
|
||||||
@ -286,21 +288,6 @@ ggplot(task_level_variables, aes(
|
|||||||
fill = "Resolution Outcome"
|
fill = "Resolution Outcome"
|
||||||
)
|
)
|
||||||
|
|
||||||
ggplot(task_level_variables, aes(
|
|
||||||
x = median_PC3_ADAC,
|
|
||||||
y = TTR,
|
|
||||||
fill = isAuthorWMF
|
|
||||||
)) +
|
|
||||||
facet_grid(~source, scales="fixed") +
|
|
||||||
geom_point(shape = 21, alpha=0.3, size=2) +
|
|
||||||
xlim(-20, 20) +
|
|
||||||
ylim(0, 1440) +
|
|
||||||
scale_fill_viridis_d() +
|
|
||||||
theme_minimal() +
|
|
||||||
labs(
|
|
||||||
title = "Median PC3 Value in ADAC Comments",
|
|
||||||
x = "Median PC3 Value",
|
|
||||||
y = "Time to Resolution (up to 60 days)",
|
|
||||||
)
|
|
||||||
# 4. save
|
# 4. save
|
||||||
write.csv(task_level_variables, "102725_DSL_df_adac.csv", row.names = FALSE)
|
write.csv(task_level_variables, "110925_DSL_df_adac.csv", row.names = FALSE)
|
||||||
|
|||||||
@ -1,16 +1,18 @@
|
|||||||
library(tidyverse)
|
library(tidyverse)
|
||||||
#library(dsl)
|
#library(dsl)
|
||||||
library(dplyr)
|
library(dplyr)
|
||||||
dsl_csv <-"~/dsl/102725_DSL_df_adac.csv"
|
dsl_csv <-"~/dsl/110925_DSL_df_adac.csv"
|
||||||
dsl_df <- read.csv(dsl_csv, header = TRUE)
|
dsl_df <- read.csv(dsl_csv, header = TRUE)
|
||||||
|
|
||||||
|
|
||||||
outcome_summary <- dsl_df |>
|
outcome_summary <- dsl_df |>
|
||||||
group_by(source, isAuthorWMF)|>
|
group_by(source, isAuthorWMF)|>
|
||||||
summarise(
|
summarise(
|
||||||
total_sum = sum(!is.na(resolution_outcome)),
|
total_sum = sum(!is.na(resolution_outcome)),
|
||||||
count_resolution_outcome = sum(resolution_outcome),
|
count_resolution_outcome = sum(resolution_outcome),
|
||||||
success_prop = count_resolution_outcome / total_sum,
|
success_prop = count_resolution_outcome / total_sum,
|
||||||
median_ttr_days = median(TTR, na.rm = TRUE) / 24
|
median_ttr_days = median(TTR, na.rm = TRUE) / 24,
|
||||||
|
median_comments_before_resolution = median(n_comments_before)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -18,6 +20,32 @@ library(ggplot2)
|
|||||||
library(ggdist)
|
library(ggdist)
|
||||||
|
|
||||||
|
|
||||||
|
ggplot(
|
||||||
|
dsl_df,
|
||||||
|
aes(
|
||||||
|
x=n_comments_before,
|
||||||
|
color=source,
|
||||||
|
fill=source
|
||||||
|
)
|
||||||
|
) +
|
||||||
|
facet_grid(~isAuthorWMF) +
|
||||||
|
stat_halfeye() +
|
||||||
|
theme_minimal()
|
||||||
|
|
||||||
|
dsl_df <- dsl_df |>
|
||||||
|
mutate(priority = factor(priority,
|
||||||
|
levels = c("Unbreak Now!", "High", "Medium", "Low", "Lowest", "Needs Triage")))
|
||||||
|
|
||||||
|
ggplot(dsl_df,
|
||||||
|
aes(
|
||||||
|
fill=resolution_outcome,
|
||||||
|
x=priority
|
||||||
|
)) +
|
||||||
|
facet_grid(~source) +
|
||||||
|
geom_bar() +
|
||||||
|
theme_minimal()
|
||||||
|
|
||||||
|
|
||||||
signed_power <- function(x, p) {
|
signed_power <- function(x, p) {
|
||||||
sign(x) * abs(x) ^ p
|
sign(x) * abs(x) ^ p
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user