1
0

updating plots for ww

This commit is contained in:
Matthew Gaughan 2025-03-05 09:20:11 -08:00
parent 7f758723c0
commit 4f6e190d18
11 changed files with 185 additions and 259 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 102 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 103 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.1 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 133 KiB

View File

@ -1,22 +1,22 @@
library(tidyverse)
count_data_fp <-"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case1/event_0217_extensions_ve_weekly_commit_count_data.csv"
count_data_fp <-"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case1/en-testing_0217_extensions_ve_weekly_commit_count_data.csv"
input_df <- read.csv(count_data_fp, header = TRUE)
input_df$nonbot_commit_count <- input_df$commit_count - input_df$bot_commit_count
window_num <- 52
input_df <- input_df |>
filter(relative_week >= (- window_num) & relative_week <= (window_num)) |>
window_num <- 12
intermediate_df <- input_df |>
mutate(nonbot_commit_count = commit_count - bot_commit_count)|>
mutate(other_commit_count = nonbot_commit_count - mediawiki_dev_commit_count - wikia_commit_count - wikimedia_commit_count) |>
mutate(wikimedia_commit_count = wikimedia_commit_count + mediawiki_dev_commit_count + wikia_commit_count) |>
select(-mediawiki_dev_commit_count) |>
select(-wikia_commit_count)
dplyr::select(-mediawiki_dev_commit_count) |>
dplyr::select(-wikia_commit_count) |>
filter(relative_week >= (- window_num) & relative_week <= (window_num))
library(scales)
library(ggplot2)
time_plot <- input_df |>
time_plot <- intermediate_df |>
ggplot(aes(x=relative_week, y=nonbot_commit_count)) +
labs(x="Weekly Offset", y="Nonbot Commit Count") +
geom_smooth() +
@ -27,7 +27,7 @@ time_plot
library(dplyr)
share_df <- input_df |>
share_df <- intermediate_df |>
mutate(wikimedia_share = wikimedia_commit_count / nonbot_commit_count) |>
mutate(other_share = other_commit_count / nonbot_commit_count)|>
drop_na()
@ -36,18 +36,15 @@ share_long <- share_df |>
dplyr::select(relative_week, wikimedia_share, other_share) |>
pivot_longer(cols = c(wikimedia_share, other_share), names_to = "category", values_to = "share")
share_plot <- share_long |>
ggplot(aes(x=relative_week, y=share, color=category)) +
commit_share_plot <- share_long |>
ggplot(aes(x=relative_week,
y=share,
color=category)) +
geom_line() +
geom_vline(xintercept = 0)+
annotate("text", x = -7, y=1, label = "2012-12-12") +
geom_vline(xintercept = 19)+
annotate("text", x = 12, y=1, label = "2013-04-28") +
geom_vline(xintercept = 28)+
annotate("text", x = 35, y=1, label = "2013-07-01") +
labs(x = "Relative Week", y = "Share of Nonbot Commit Count", color = "Affiliation") +
ggtitle("VE Weekly Share of Nonbot Commit Count by Affiliation (enwiki opt-in testing 2012-12-12)") +
geom_point() +
labs(x = "Relative Week", y = "Share of Nonbot Commits", color="Commit Author Affiliation") +
scale_color_discrete(labels = c("Unaffiliated", "Organizationally Affiliated")) +
ggtitle("VisualEditor Nonbot Commit Share Around Opt-out Deployment") +
theme_bw() +
theme(legend.position = "top")
share_plot
commit_share_plot

View File

@ -10,7 +10,8 @@ event_df <- read.csv(event_fp, header = TRUE) |> mutate(rd_event = "default")
#input_df <- bind_rows(entest_df, widetest_df, event_df)
#dropping the event (2013-07-01) from the modeling
input_df <- bind_rows(entest_df, widetest_df)
#input_df <- bind_rows(entest_df, widetest_df)
input_df <- event_df
input_df <- input_df |>
mutate(nonbot_commit_count = commit_count - bot_commit_count)|>
@ -37,16 +38,13 @@ intermediate_long_df <- long_df |>
library(rdd)
intermediate_long_df <- intermediate_long_df |>
drop_na()
var(intermediate_long_df$lengthened_commit_count) # 1253.343
mean(intermediate_long_df$lengthened_commit_count) # 44.92381
median(intermediate_long_df$lengthened_commit_count) # 39.5
get_optimal_bandwidth <- function(df){
bw <- tryCatch({
IKbandwidth(df$relative_week, df$lengthened_commit_count, cutpoint = 0, verbose = FALSE, kernel = "triangular")
IKbandwidth(df$relative_week, df$commit_share, cutpoint = 0, verbose = FALSE, kernel = "triangular")
}, error = function(e) {
NA
})
@ -63,15 +61,15 @@ library(fitdistrplus)
descdist(final_long_df$lengthened_commit_count, discrete=FALSE)
#start_values <- list(shape1 = 1, shape2 = 1)
#fit <- MASS::fitdistr(as.numeric(long_df$lengthened_commit_count), "negative binomial")
print(fit)
#print(fit)
#NOTE should not run if you've already dropped NA
mlm <- glmer.nb(lengthened_commit_count ~ before_after*relative_week +
(before_after*relative_week|commit_type) +
(before_after*relative_week|rd_event),
(before_after*relative_week|commit_type),
control=glmerControl(optimizer="bobyqa",
optCtrl=list(maxfun=2e5)), nAGQ=0,
data=final_long_df)
#(before_after*relative_week|rd_event)
#mlm <- lmer(lengthened_commit_count ~ before_after*relative_week+
# (before_after*relative_week|commit_type) +
# (before_after*relative_week|rd_event) ,data=long_df)
@ -83,24 +81,33 @@ print(res)
#final_long_df <- final_long_df |>
# drop_na()
library(performance)
#descdist(long_df$commit_share, discrete=FALSE)
#fit <- MASS::fitdistr(as.numeric(long_df$commit_share), "normal")
#print(fit)
library(texreg)
descdist(final_long_df$commit_share, discrete=FALSE)
wikimedia_long_df <- final_long_df |>
filter(commit_type == "wikimedia_commit_count")
wikimedia_share_lmer <- lmer(commit_share ~ before_after*relative_week +
(1| rd_event),
data=wikimedia_long_df)
summary(wikimedia_share_lmer)
icc(wikimedia_share_lmer)
filter(commit_type == "wikimedia_commit_count")|>
drop_na()
wikimedia_share_lm <- lm(commit_share ~ before_after*relative_week,
data=wikimedia_long_df)
summary(wikimedia_share_lm)
qqnorm(residuals(wikimedia_share_lm))
texreg(wikimedia_share_lm)
other_long_df <- final_long_df |>
filter(commit_type == "other_commit_count")
other_share_lmer <- lmer(commit_share ~ before_after*relative_week +
(1| rd_event),
data=other_long_df)
summary(other_share_lmer)
icc(other_share_lmer)
filter(commit_type == "other_commit_count") |>
drop_na()
#other_share_lmer <- lmer(commit_share ~ before_after*relative_week +
# (1| rd_event),
# data=other_long_df)
other_share_lm <- lm(commit_share ~ before_after*relative_week,
data=other_long_df)
summary(other_share_lm)
qqnorm(residuals(other_share_lm))
texreg(other_share_lm)
#power analysis
#library(simr)

View File

@ -11,6 +11,7 @@ event_df <- read.csv(event_fp, header = TRUE) |> mutate(rd_event = "default")
#input_df <- bind_rows(entest_df, widetest_df, event_df)
#dropping the event (2013-07-01) from the modeling
input_df <- bind_rows(entest_df, widetest_df)
#input_df <- event_df
input_df <- input_df |>
mutate(nonbot_commit_count = commit_count - bot_commit_count)|>
@ -30,42 +31,52 @@ intermediate_long_df <- long_df |>
mutate(log_commits = log1p(lengthened_commit_count))|>
mutate(scaled_long_commits = lengthened_commit_count / 10)
intermediate_long_df <- intermediate_long_df |>
drop_na()
window_num <- 4
final_long_df <- intermediate_long_df |>
filter(relative_week >= (- window_num) & relative_week <= (window_num))
affiliationColors <-
setNames( c('#5da2d8', '#c7756a')
,c("other_commit_count", "wikimedia_commit_count"))
commit_plot <- final_long_df |>
ggplot(aes(x=relative_week,
y=lengthened_commit_count,
color=commit_type,
color=factor(commit_type),
linetype = rd_event)) +
geom_line() +
geom_point() +
labs(x = "Relative Week", y = "Nonbot Commits", linetype = "Testing Event", color="Commit Author Affiliation") +
scale_color_manual(values = affiliationColors,
labels = c("other_commit_count" = "Unaffiliated", "wikimedia_commit_count" = "WMF Affiliated")) +
scale_linetype_discrete(labels = c("enwiki testing (2012-12-12)", "wide testing (2013-04-25)")) +
scale_color_discrete(labels = c("Unaffiliated", "Organizationally Affiliated")) +
ggtitle("VisualEditor Nonbot Commit Count Around Opt-In Testing Events (by Affiliation)") +
theme_bw() +
theme(legend.position = "top")
commit_plot
total_commit_plot <- final_long_df |>
filter(commit_type == "other_commit_count")|>
ggplot(aes(x=relative_week,
y=nonbot_commit_count,
linetype = rd_event)) +
geom_line() +
geom_point() +
geom_vline(xintercept = 0, linetype = "dotted", color = "black") +
labs(x = "Relative Week", y = "Nonbot Commit Count", linetype = "Testing Event") +
scale_linetype_discrete(labels = c("enwiki testing (2012-12-12)", "wide testing (2013-04-25)")) +
ggtitle("VisualEditor Nonbot Commit Count Around Opt-In Testing Events") +
ggtitle("VisualEditor Weekly Nonbot Commits Surrounding Opt-In Testing Events") +
theme_bw() +
theme(legend.position = "top")
total_commit_plot
ggsave(filename = "0305-ve-total-commits.png", plot = total_commit_plot, width = 15, height = 9, dpi = 800)
final_long_df <- final_long_df|>
drop_na()
commit_share_plot <- final_long_df |>
ggplot(aes(x=relative_week,
y=commit_share,
@ -74,9 +85,12 @@ commit_share_plot <- final_long_df |>
geom_line() +
geom_point() +
labs(x = "Relative Week", y = "Share of Nonbot Commits", linetype = "Testing Event", color="Commit Author Affiliation") +
scale_color_manual(values = affiliationColors,
labels = c("other_commit_count" = "Unaffiliated", "wikimedia_commit_count" = "WMF Affiliated")) +
scale_linetype_discrete(labels = c("enwiki testing (2012-12-12)", "wide testing (2013-04-25)")) +
scale_color_discrete(labels = c("Unaffiliated", "Organizationally Affiliated")) +
ggtitle("VisualEditor Nonbot Commit Share Around Opt-In Testing Events") +
ggtitle("Share of Weekly VisualEditor Commits Surrounding Opt-in Testing Events") +
theme_bw() +
theme(legend.position = "top")
commit_share_plot
ggsave(filename = "0305-ve-testing-share.png", plot = commit_share_plot, width = 12, height = 9, dpi = 800)

View File

@ -0,0 +1,17 @@
1. SSH tunnel from your workstation using the following command:
ssh -N -L 8787:n3439:56597 mjilg@klone.hyak.uw.edu
and point your web browser to http://localhost:8787
2. log in to RStudio Server using the following credentials:
user: mjilg
password: wO04FrVKQP5bSLRuzEi5
When done using RStudio Server, terminate the job by:
1. Exit the RStudio Session ("power" button in the top right corner of the RStudio window)
2. Issue the following command on the login node:
scancel -f 24539959

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -72,7 +72,7 @@ grouped_results <- results |>
plot <- grouped_results |>
ggplot(aes(x=week,
y=X5,
y=X1,
color=affil)) +
geom_line()
plot