diff --git a/121325_work/work-migration.R b/121325_work/ve-commit-comparison.R similarity index 74% rename from 121325_work/work-migration.R rename to 121325_work/ve-commit-comparison.R index f967415..275b921 100644 --- a/121325_work/work-migration.R +++ b/121325_work/ve-commit-comparison.R @@ -24,7 +24,7 @@ known_affil_emails <- c("krinkle@fastmail.com", "roan.kattouw@gmail.com", "trevorparscal@gmail.com", "krinklemail@gmail.com", "moriel@gmail.com") active_names<- c("Timo Tijhof", "Krinkle", "Roan Kattouw", "Catrope", - "Trevor Parscal", "Ed Sanders") + "Trevor Parscal", "Ed Sanders", "Moriel Schottlender", "Gabriel Wicke", "C. Scott Ananian") #jforrester@wikimedia.org #(author_name %in% active_names) ~ "ActiveEmails", joint_df <- joint_df |> @@ -48,14 +48,14 @@ authors_in_both <- joint_df |> joint_df_filtered <- joint_df |> filter(author_email %in% authors_in_both) c1_event_date <- as.Date("2013-07-01") -c1_core_weekly <- joint_df_filtered |> +c1_weekly <- joint_df |> mutate(week_index = relative_week(commit_date, c1_event_date)) |> group_by(week_index, isAuthorWMF, repo)|> summarise(count = n(), .groups = 'drop')|> filter(week_index >= -33 & week_index <= 13) |> mutate(source = 'c1') -counts <- c1_core_weekly |> +counts <- c1_weekly |> filter(isAuthorWMF != "Gerrit")|> filter(isAuthorWMF != "localization")|> filter(source == 'c1') @@ -72,10 +72,8 @@ commits_created <- ) ) + facet_grid(repo ~ ., - scales = "free_y", - labeller = labeller(source = c("c1" = "VisualEditor", - "c2" = "HTTPS-login", - "c3" = "HTTP-deprecation"))) + + labeller = labeller(repo = c("ve" = "extensions/visualeditor", + "core" = "mediawiki/core"))) + geom_col(position = position_dodge(width = 0.9), width = 0.8) + geom_vline(data = counts |> filter(source == "c1"), aes(xintercept = -29), @@ -88,24 +86,33 @@ commits_created <- linetype = "3313", color = "black", linewidth = 0.5) + geom_vline(xintercept = 0, linetype = "dashed", color = "black", linewidth = 0.5) + geom_text( - data = subset(counts, source == "c1" & week_index == 3), - aes(x=week_index, y=100, label='Opt-out deployment'), - size = 2.5) + + data = subset(counts, repo == "core" & week_index == 4), + aes(x=week_index, y=75, label='Opt-out deployment'), + size = 3) + geom_text( - data = subset(counts, source == "c1" & week_index == -27), - aes(x=week_index, y=100, label='Opt-in Testing'), - size = 2.5) + + data = subset(counts, repo == "core" & week_index == -27), + aes(x=week_index, y=75, label='Opt-in Testing'), + size = 3) + geom_text( - data = subset(counts, source == "c1" & week_index == -4), - aes(x=week_index, y=100, label='Deployment Announcement'), - size = 2.5) + + data = subset(counts, repo == "core" & week_index == -8), + aes(x=week_index, y=75, label='Deployment Announcement'), + size = 3) + theme_minimal() + - scale_fill_viridis_d() + + scale_fill_viridis_d( + breaks = c("FALSE", "TRUE", "BzImport"), + labels = c("External Contributor", "WMF-affiliate", "BzImport") + ) + labs( - x = "Weeks from Feature Deployment", - y = "Weekly count of new Commits", - fill = "Commit Author Affiliated with WMF?" + x = "Weeks from VisualEditor Deployment on Wikipedia", + y = "Count of New Commits", + fill = "Commit Author" ) + theme(legend.position = "top") commits_created - +ggsave( + filename = "011925_ve_commits_created.png", + plot = commits_created, + width = 8, # inches + height = 4, # inches + dpi = 800 # high resolution +) diff --git a/doc_plots/011125_dsl_coefs.png b/final_plots/011126_dsl_coefs.png similarity index 100% rename from doc_plots/011125_dsl_coefs.png rename to final_plots/011126_dsl_coefs.png diff --git a/final_plots/011926_adac_affil_style.png b/final_plots/011926_adac_affil_style.png new file mode 100644 index 0000000..115d3f0 Binary files /dev/null and b/final_plots/011926_adac_affil_style.png differ diff --git a/final_plots/011926_machine_label_comparison.png b/final_plots/011926_machine_label_comparison.png new file mode 100644 index 0000000..01f6ec8 Binary files /dev/null and b/final_plots/011926_machine_label_comparison.png differ diff --git a/final_plots/011926_tasks_created.png b/final_plots/011926_tasks_created.png new file mode 100644 index 0000000..158e61a Binary files /dev/null and b/final_plots/011926_tasks_created.png differ diff --git a/final_plots/011926_ttr_boxplot.png b/final_plots/011926_ttr_boxplot.png new file mode 100644 index 0000000..c39f44d Binary files /dev/null and b/final_plots/011926_ttr_boxplot.png differ diff --git a/final_plots/011926_ve_commits_created.png b/final_plots/011926_ve_commits_created.png new file mode 100644 index 0000000..f8835bc Binary files /dev/null and b/final_plots/011926_ve_commits_created.png differ diff --git a/main_plot_script.R b/main_plot_script.R index 8b54f86..ea91dd3 100644 --- a/main_plot_script.R +++ b/main_plot_script.R @@ -68,7 +68,7 @@ tasks_created <- ggplot( theme_minimal() + scale_fill_viridis_d( breaks = c("FALSE", "TRUE", "BzImport"), - labels = c("Nonaffiliate", "WMF-affiliate", "BzImport") + labels = c("External Contributor", "WMF-affiliate", "BzImport") ) + labs( x = "Weeks from Feature Deployment", @@ -78,7 +78,7 @@ tasks_created <- ggplot( theme(legend.position = "top") tasks_created ggsave( - filename = "011025_tasks_created.png", + filename = "011925_tasks_created.png", plot = tasks_created, width = 8, # inches height = 4, # inches @@ -148,7 +148,7 @@ ttr_boxplot <- dsl_df |> filter(priority == "Needs Triage" | priority == "Unbreak Now!" | priority == "High"), source == "c1" & week_index == 12), - aes(x=week_index, y=80, label='Opt-in Testing'), + aes(x=week_index, y=80, label='Opt-out Deployment'), color = "black", size = 3) + geom_vline(xintercept =14, linetype = "dashed", color = "black", linewidth = 0.5) + @@ -159,17 +159,15 @@ ttr_boxplot <- dsl_df |> theme(legend.position = "top") ttr_boxplot ggsave( - filename = "011025_ttr_boxplot.png", + filename = "011925_ttr_boxplot.png", plot = ttr_boxplot, width = 8, # inches height = 4, # inches dpi = 800 # high resolution ) #4.3 plot comparing machine labels of information type -dsl_df <- dsl_df |> - filter(isAuthorWMF != "BzImport") - dsl_df_long <- dsl_df %>% + filter(isAuthorWMF != "BzImport")|> pivot_longer( cols = c(olmo_EP_prop_adac, olmo_RK_prop_adac, olmo_TSOL_prop_adac), names_to = "tag", @@ -197,7 +195,10 @@ olmo_comparison <- ggplot( "c3" = "HTTP-deprecation"))) + geom_boxplot() + theme_minimal() + - scale_fill_viridis_d() + + scale_fill_viridis_d( + breaks = c("FALSE", "TRUE", "BzImport"), + labels = c("External Contributor", "WMF-affiliate", "BzImport") + ) + labs( x = "Issue Information Type Category", y = "% of sentences machine-labeled", @@ -207,7 +208,7 @@ olmo_comparison <- ggplot( theme(legend.position = "top") olmo_comparison ggsave( - filename = "011025_machine_label_comparison.png", + filename = "011925_machine_label_comparison.png", plot = olmo_comparison, width = 8, # inches height = 4, # inches diff --git a/p2/quest/neurobiber_PCA_analysis.R b/p2/quest/neurobiber_PCA_analysis.R index 3e5ea92..e9fd0a4 100644 --- a/p2/quest/neurobiber_PCA_analysis.R +++ b/p2/quest/neurobiber_PCA_analysis.R @@ -111,7 +111,7 @@ adac_style <- main_df |> ylim(-50, 50) + scale_fill_viridis_d( breaks = c("FALSE", "TRUE", "BzImport"), - labels = c("Nonaffiliate", "WMF-affiliate", "BzImport") + labels = c("External Contributor", "WMF-affiliate", "BzImport") ) + theme_minimal() + theme(legend.position = "top") + @@ -123,7 +123,7 @@ adac_style <- main_df |> adac_style #"PCs for Pre-Resolution Comments Written by Task Author (by Author Affiliation, Case, and Comment Type)" ggsave( - filename = "011125_adac_affil_style.png", + filename = "011925_adac_affil_style.png", plot = adac_style, width = 10, # inches height = 5, # inches