updating plots for ww
This commit is contained in:
parent
7f758723c0
commit
4f6e190d18
BIN
commit_analysis/case1/030225_core_commit_count_ba.png
Normal file
BIN
commit_analysis/case1/030225_core_commit_count_ba.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 102 KiB |
BIN
commit_analysis/case1/030225_core_share_ba.png
Normal file
BIN
commit_analysis/case1/030225_core_share_ba.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 103 KiB |
BIN
commit_analysis/case1/0305-ve-total-commits.png
Normal file
BIN
commit_analysis/case1/0305-ve-total-commits.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 1.1 MiB |
BIN
commit_analysis/case1/030525_ve_commit_plot.png
Normal file
BIN
commit_analysis/case1/030525_ve_commit_plot.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 133 KiB |
@ -1,22 +1,22 @@
|
||||
library(tidyverse)
|
||||
count_data_fp <-"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case1/event_0217_extensions_ve_weekly_commit_count_data.csv"
|
||||
count_data_fp <-"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case1/en-testing_0217_extensions_ve_weekly_commit_count_data.csv"
|
||||
input_df <- read.csv(count_data_fp, header = TRUE)
|
||||
|
||||
input_df$nonbot_commit_count <- input_df$commit_count - input_df$bot_commit_count
|
||||
|
||||
window_num <- 52
|
||||
input_df <- input_df |>
|
||||
filter(relative_week >= (- window_num) & relative_week <= (window_num)) |>
|
||||
window_num <- 12
|
||||
intermediate_df <- input_df |>
|
||||
mutate(nonbot_commit_count = commit_count - bot_commit_count)|>
|
||||
mutate(other_commit_count = nonbot_commit_count - mediawiki_dev_commit_count - wikia_commit_count - wikimedia_commit_count) |>
|
||||
mutate(wikimedia_commit_count = wikimedia_commit_count + mediawiki_dev_commit_count + wikia_commit_count) |>
|
||||
select(-mediawiki_dev_commit_count) |>
|
||||
select(-wikia_commit_count)
|
||||
dplyr::select(-mediawiki_dev_commit_count) |>
|
||||
dplyr::select(-wikia_commit_count) |>
|
||||
filter(relative_week >= (- window_num) & relative_week <= (window_num))
|
||||
|
||||
library(scales)
|
||||
library(ggplot2)
|
||||
|
||||
time_plot <- input_df |>
|
||||
time_plot <- intermediate_df |>
|
||||
ggplot(aes(x=relative_week, y=nonbot_commit_count)) +
|
||||
labs(x="Weekly Offset", y="Nonbot Commit Count") +
|
||||
geom_smooth() +
|
||||
@ -27,7 +27,7 @@ time_plot
|
||||
|
||||
library(dplyr)
|
||||
|
||||
share_df <- input_df |>
|
||||
share_df <- intermediate_df |>
|
||||
mutate(wikimedia_share = wikimedia_commit_count / nonbot_commit_count) |>
|
||||
mutate(other_share = other_commit_count / nonbot_commit_count)|>
|
||||
drop_na()
|
||||
@ -36,18 +36,15 @@ share_long <- share_df |>
|
||||
dplyr::select(relative_week, wikimedia_share, other_share) |>
|
||||
pivot_longer(cols = c(wikimedia_share, other_share), names_to = "category", values_to = "share")
|
||||
|
||||
share_plot <- share_long |>
|
||||
ggplot(aes(x=relative_week, y=share, color=category)) +
|
||||
commit_share_plot <- share_long |>
|
||||
ggplot(aes(x=relative_week,
|
||||
y=share,
|
||||
color=category)) +
|
||||
geom_line() +
|
||||
geom_vline(xintercept = 0)+
|
||||
annotate("text", x = -7, y=1, label = "2012-12-12") +
|
||||
geom_vline(xintercept = 19)+
|
||||
annotate("text", x = 12, y=1, label = "2013-04-28") +
|
||||
geom_vline(xintercept = 28)+
|
||||
annotate("text", x = 35, y=1, label = "2013-07-01") +
|
||||
labs(x = "Relative Week", y = "Share of Nonbot Commit Count", color = "Affiliation") +
|
||||
ggtitle("VE Weekly Share of Nonbot Commit Count by Affiliation (enwiki opt-in testing 2012-12-12)") +
|
||||
geom_point() +
|
||||
labs(x = "Relative Week", y = "Share of Nonbot Commits", color="Commit Author Affiliation") +
|
||||
scale_color_discrete(labels = c("Unaffiliated", "Organizationally Affiliated")) +
|
||||
ggtitle("VisualEditor Nonbot Commit Share Around Opt-out Deployment") +
|
||||
theme_bw() +
|
||||
theme(legend.position = "top")
|
||||
share_plot
|
||||
|
||||
commit_share_plot
|
||||
|
@ -10,7 +10,8 @@ event_df <- read.csv(event_fp, header = TRUE) |> mutate(rd_event = "default")
|
||||
|
||||
#input_df <- bind_rows(entest_df, widetest_df, event_df)
|
||||
#dropping the event (2013-07-01) from the modeling
|
||||
input_df <- bind_rows(entest_df, widetest_df)
|
||||
#input_df <- bind_rows(entest_df, widetest_df)
|
||||
input_df <- event_df
|
||||
|
||||
input_df <- input_df |>
|
||||
mutate(nonbot_commit_count = commit_count - bot_commit_count)|>
|
||||
@ -37,16 +38,13 @@ intermediate_long_df <- long_df |>
|
||||
|
||||
library(rdd)
|
||||
|
||||
intermediate_long_df <- intermediate_long_df |>
|
||||
drop_na()
|
||||
|
||||
var(intermediate_long_df$lengthened_commit_count) # 1253.343
|
||||
mean(intermediate_long_df$lengthened_commit_count) # 44.92381
|
||||
median(intermediate_long_df$lengthened_commit_count) # 39.5
|
||||
|
||||
get_optimal_bandwidth <- function(df){
|
||||
bw <- tryCatch({
|
||||
IKbandwidth(df$relative_week, df$lengthened_commit_count, cutpoint = 0, verbose = FALSE, kernel = "triangular")
|
||||
IKbandwidth(df$relative_week, df$commit_share, cutpoint = 0, verbose = FALSE, kernel = "triangular")
|
||||
}, error = function(e) {
|
||||
NA
|
||||
})
|
||||
@ -63,15 +61,15 @@ library(fitdistrplus)
|
||||
descdist(final_long_df$lengthened_commit_count, discrete=FALSE)
|
||||
#start_values <- list(shape1 = 1, shape2 = 1)
|
||||
#fit <- MASS::fitdistr(as.numeric(long_df$lengthened_commit_count), "negative binomial")
|
||||
print(fit)
|
||||
#print(fit)
|
||||
|
||||
#NOTE should not run if you've already dropped NA
|
||||
mlm <- glmer.nb(lengthened_commit_count ~ before_after*relative_week +
|
||||
(before_after*relative_week|commit_type) +
|
||||
(before_after*relative_week|rd_event),
|
||||
(before_after*relative_week|commit_type),
|
||||
control=glmerControl(optimizer="bobyqa",
|
||||
optCtrl=list(maxfun=2e5)), nAGQ=0,
|
||||
data=final_long_df)
|
||||
#(before_after*relative_week|rd_event)
|
||||
#mlm <- lmer(lengthened_commit_count ~ before_after*relative_week+
|
||||
# (before_after*relative_week|commit_type) +
|
||||
# (before_after*relative_week|rd_event) ,data=long_df)
|
||||
@ -83,24 +81,33 @@ print(res)
|
||||
#final_long_df <- final_long_df |>
|
||||
# drop_na()
|
||||
library(performance)
|
||||
#descdist(long_df$commit_share, discrete=FALSE)
|
||||
#fit <- MASS::fitdistr(as.numeric(long_df$commit_share), "normal")
|
||||
#print(fit)
|
||||
library(texreg)
|
||||
|
||||
descdist(final_long_df$commit_share, discrete=FALSE)
|
||||
|
||||
wikimedia_long_df <- final_long_df |>
|
||||
filter(commit_type == "wikimedia_commit_count")
|
||||
wikimedia_share_lmer <- lmer(commit_share ~ before_after*relative_week +
|
||||
(1| rd_event),
|
||||
filter(commit_type == "wikimedia_commit_count")|>
|
||||
drop_na()
|
||||
|
||||
wikimedia_share_lm <- lm(commit_share ~ before_after*relative_week,
|
||||
data=wikimedia_long_df)
|
||||
summary(wikimedia_share_lmer)
|
||||
icc(wikimedia_share_lmer)
|
||||
summary(wikimedia_share_lm)
|
||||
qqnorm(residuals(wikimedia_share_lm))
|
||||
|
||||
texreg(wikimedia_share_lm)
|
||||
|
||||
other_long_df <- final_long_df |>
|
||||
filter(commit_type == "other_commit_count")
|
||||
other_share_lmer <- lmer(commit_share ~ before_after*relative_week +
|
||||
(1| rd_event),
|
||||
filter(commit_type == "other_commit_count") |>
|
||||
drop_na()
|
||||
#other_share_lmer <- lmer(commit_share ~ before_after*relative_week +
|
||||
# (1| rd_event),
|
||||
# data=other_long_df)
|
||||
other_share_lm <- lm(commit_share ~ before_after*relative_week,
|
||||
data=other_long_df)
|
||||
summary(other_share_lmer)
|
||||
icc(other_share_lmer)
|
||||
summary(other_share_lm)
|
||||
qqnorm(residuals(other_share_lm))
|
||||
|
||||
texreg(other_share_lm)
|
||||
|
||||
#power analysis
|
||||
#library(simr)
|
||||
|
@ -11,6 +11,7 @@ event_df <- read.csv(event_fp, header = TRUE) |> mutate(rd_event = "default")
|
||||
#input_df <- bind_rows(entest_df, widetest_df, event_df)
|
||||
#dropping the event (2013-07-01) from the modeling
|
||||
input_df <- bind_rows(entest_df, widetest_df)
|
||||
#input_df <- event_df
|
||||
|
||||
input_df <- input_df |>
|
||||
mutate(nonbot_commit_count = commit_count - bot_commit_count)|>
|
||||
@ -30,42 +31,52 @@ intermediate_long_df <- long_df |>
|
||||
mutate(log_commits = log1p(lengthened_commit_count))|>
|
||||
mutate(scaled_long_commits = lengthened_commit_count / 10)
|
||||
|
||||
intermediate_long_df <- intermediate_long_df |>
|
||||
drop_na()
|
||||
|
||||
window_num <- 4
|
||||
final_long_df <- intermediate_long_df |>
|
||||
filter(relative_week >= (- window_num) & relative_week <= (window_num))
|
||||
|
||||
affiliationColors <-
|
||||
setNames( c('#5da2d8', '#c7756a')
|
||||
,c("other_commit_count", "wikimedia_commit_count"))
|
||||
|
||||
|
||||
commit_plot <- final_long_df |>
|
||||
ggplot(aes(x=relative_week,
|
||||
y=lengthened_commit_count,
|
||||
color=commit_type,
|
||||
color=factor(commit_type),
|
||||
linetype = rd_event)) +
|
||||
geom_line() +
|
||||
geom_point() +
|
||||
labs(x = "Relative Week", y = "Nonbot Commits", linetype = "Testing Event", color="Commit Author Affiliation") +
|
||||
scale_color_manual(values = affiliationColors,
|
||||
labels = c("other_commit_count" = "Unaffiliated", "wikimedia_commit_count" = "WMF Affiliated")) +
|
||||
scale_linetype_discrete(labels = c("enwiki testing (2012-12-12)", "wide testing (2013-04-25)")) +
|
||||
scale_color_discrete(labels = c("Unaffiliated", "Organizationally Affiliated")) +
|
||||
ggtitle("VisualEditor Nonbot Commit Count Around Opt-In Testing Events (by Affiliation)") +
|
||||
theme_bw() +
|
||||
theme(legend.position = "top")
|
||||
commit_plot
|
||||
|
||||
total_commit_plot <- final_long_df |>
|
||||
filter(commit_type == "other_commit_count")|>
|
||||
ggplot(aes(x=relative_week,
|
||||
y=nonbot_commit_count,
|
||||
linetype = rd_event)) +
|
||||
geom_line() +
|
||||
geom_point() +
|
||||
geom_vline(xintercept = 0, linetype = "dotted", color = "black") +
|
||||
labs(x = "Relative Week", y = "Nonbot Commit Count", linetype = "Testing Event") +
|
||||
scale_linetype_discrete(labels = c("enwiki testing (2012-12-12)", "wide testing (2013-04-25)")) +
|
||||
ggtitle("VisualEditor Nonbot Commit Count Around Opt-In Testing Events") +
|
||||
ggtitle("VisualEditor Weekly Nonbot Commits Surrounding Opt-In Testing Events") +
|
||||
theme_bw() +
|
||||
theme(legend.position = "top")
|
||||
total_commit_plot
|
||||
|
||||
ggsave(filename = "0305-ve-total-commits.png", plot = total_commit_plot, width = 15, height = 9, dpi = 800)
|
||||
|
||||
|
||||
final_long_df <- final_long_df|>
|
||||
drop_na()
|
||||
|
||||
commit_share_plot <- final_long_df |>
|
||||
ggplot(aes(x=relative_week,
|
||||
y=commit_share,
|
||||
@ -74,9 +85,12 @@ commit_share_plot <- final_long_df |>
|
||||
geom_line() +
|
||||
geom_point() +
|
||||
labs(x = "Relative Week", y = "Share of Nonbot Commits", linetype = "Testing Event", color="Commit Author Affiliation") +
|
||||
scale_color_manual(values = affiliationColors,
|
||||
labels = c("other_commit_count" = "Unaffiliated", "wikimedia_commit_count" = "WMF Affiliated")) +
|
||||
scale_linetype_discrete(labels = c("enwiki testing (2012-12-12)", "wide testing (2013-04-25)")) +
|
||||
scale_color_discrete(labels = c("Unaffiliated", "Organizationally Affiliated")) +
|
||||
ggtitle("VisualEditor Nonbot Commit Share Around Opt-In Testing Events") +
|
||||
ggtitle("Share of Weekly VisualEditor Commits Surrounding Opt-in Testing Events") +
|
||||
theme_bw() +
|
||||
theme(legend.position = "top")
|
||||
commit_share_plot
|
||||
|
||||
ggsave(filename = "0305-ve-testing-share.png", plot = commit_share_plot, width = 12, height = 9, dpi = 800)
|
||||
|
17
mgaughan-rstudio-server_24539959.out
Normal file
17
mgaughan-rstudio-server_24539959.out
Normal file
@ -0,0 +1,17 @@
|
||||
1. SSH tunnel from your workstation using the following command:
|
||||
|
||||
ssh -N -L 8787:n3439:56597 mjilg@klone.hyak.uw.edu
|
||||
|
||||
and point your web browser to http://localhost:8787
|
||||
|
||||
2. log in to RStudio Server using the following credentials:
|
||||
|
||||
user: mjilg
|
||||
password: wO04FrVKQP5bSLRuzEi5
|
||||
|
||||
When done using RStudio Server, terminate the job by:
|
||||
|
||||
1. Exit the RStudio Session ("power" button in the top right corner of the RStudio window)
|
||||
2. Issue the following command on the login node:
|
||||
|
||||
scancel -f 24539959
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@ -72,7 +72,7 @@ grouped_results <- results |>
|
||||
|
||||
plot <- grouped_results |>
|
||||
ggplot(aes(x=week,
|
||||
y=X5,
|
||||
y=X1,
|
||||
color=affil)) +
|
||||
geom_line()
|
||||
plot
|
Loading…
Reference in New Issue
Block a user