library(tidyverse) main_csv <-"~/analysis_data/120725_unified.csv" main_df <- read.csv(main_csv, header = TRUE) author_closer <- main_df |> filter(comment_type == "task_description") |> filter(author_closer == "True") table(author_closer$isAuthorWMF) new_authors_summary <- main_df |> filter(comment_type == "task_description") |> group_by(source, AuthorPHID) |> summarise( task_count = n(), first_task = min(week_index) ) |> group_by(first_task, source) |> summarise( new_authors_count = n() ) ggplot(new_authors_summary, aes(x = first_task, y = new_authors_count)) + facet_grid(source ~ ., scales = "free_y", labeller = labeller(source = c("c1" = "VisualEditor", "c2" = "HTTPS-login", "c3" = "HTTP-deprecation"))) + geom_col() + labs( x = "Date of first task", y = "Number of tasks created", title = "Task count by Author's first task date" ) + theme_minimal()