library(tidyverse) library(dplyr) entest_fp <-"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case1/en-testing_0312_extensions_ve_weekly_commit_count_data.csv" entest_df <- read.csv(entest_fp, header = TRUE) |> mutate(rd_event = "en-testing") widetest_fp <-"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case1/wide-testing_0312_extensions_ve_weekly_commit_count_data.csv" widetest_df <- read.csv(widetest_fp, header = TRUE) |> mutate(rd_event = "wide-testing") event_fp <-"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case1/event_0312_extensions_ve_weekly_commit_count_data.csv" event_df <- read.csv(event_fp, header = TRUE) |> mutate(rd_event = "default") #input_df <- bind_rows(entest_df, widetest_df, event_df) #dropping the event (2013-07-01) from the modeling #input_df <- bind_rows(entest_df, widetest_df) input_df <- event_df input_df <- input_df |> mutate(nonbot_commit_count = commit_count - bot_commit_count)|> mutate(other_commit_count = nonbot_commit_count - mediawiki_dev_commit_count - wikia_commit_count - wikimedia_commit_count) |> mutate(wikimedia_commit_count = wikimedia_commit_count + mediawiki_dev_commit_count + wikia_commit_count) |> dplyr::select(-mediawiki_dev_commit_count) |> dplyr::select(-wikia_commit_count) library(MASS) library(lme4) library(dplyr) #get into mlm format long_df <- input_df |> tidyr::pivot_longer(cols = c(other_commit_count, wikimedia_commit_count, unaff_ft_commit_count), names_to = "commit_type", values_to = "lengthened_commit_count") intermediate_long_df <- long_df |> mutate(commit_share = lengthened_commit_count / (nonbot_commit_count)) |> mutate(log_commits = log1p(lengthened_commit_count))|> mutate(scaled_long_commits = lengthened_commit_count / 10) library(rdd) var(intermediate_long_df$lengthened_commit_count) mean(intermediate_long_df$lengthened_commit_count) median(intermediate_long_df$lengthened_commit_count) get_optimal_bandwidth <- function(df){ bw <- tryCatch({ IKbandwidth(df$relative_week, df$lengthened_commit_count, cutpoint = 0, verbose = FALSE, kernel = "triangular") }, error = function(e) { NA }) } intermediate_long_df <- intermediate_long_df |> filter(commit_type != "unaff_ft_commit_count") optimal_bandwidth <- get_optimal_bandwidth(intermediate_long_df) library(dplyr) window_num <- 10 final_long_df <- intermediate_long_df |> filter(relative_week >= (-window_num) & relative_week <= window_num) |> filter(commit_type != "unaff_ft_commit_count") library(fitdistrplus) descdist(final_long_df$lengthened_commit_count, discrete=FALSE) #start_values <- list(shape1 = 1, shape2 = 1) #fit <- MASS::fitdistr(as.numeric(long_df$lengthened_commit_count), "negative binomial") #print(fit) #NOTE should not run if you've already dropped NA mlm <- glmer.nb(lengthened_commit_count ~ before_after*relative_week + (before_after*relative_week|commit_type), control=glmerControl(optimizer="bobyqa", optCtrl=list(maxfun=2e5)), nAGQ=0, data=final_long_df) #(before_after*relative_week|rd_event) saveRDS(mlm, file = "0312_ve_ve_event_commits_mlm.rds") #mlm <- readRDS("commit_analysis/case1/0312_core_ve_testing_commits_mlm.rds") summary(mlm) qqnorm(residuals(mlm)) res <- ranef(mlm) print(res) library(broom.mixed) library(ggplot2) condvals <- broom.mixed::tidy(mlm, effects = "ran_vals", conf.int = TRUE) glmer_ranef_ba <- condvals write.csv(glmer_ranef_ba, "0312_ve_ve_event_ba_ranefs.csv") texreg(mlm) #final_long_df <- final_long_df |> # drop_na() library(performance) library(texreg) #descdist(final_long_df$commit_share, discrete=FALSE) wikimedia_long_df <- final_long_df |> filter(commit_type == "wikimedia_commit_count")|> drop_na() #wikimedia_share_lm <- lm(commit_share ~ before_after*relative_week, # data=wikimedia_long_df) #summary(wikimedia_share_lm) #qqnorm(residuals(wikimedia_share_lm)) wikimedia_share_lmer <- lmer(commit_share ~ before_after*relative_week + (before_after*relative_week | rd_event), data=wikimedia_long_df) summary(wikimedia_share_lmer) ranef(wikimedia_share_lmer) texreg(wikimedia_share_lm) other_long_df <- final_long_df |> filter(commit_type == "other_commit_count") |> drop_na() other_share_lmer <- lmer(commit_share ~ before_after*relative_week + (1| rd_event), data=other_long_df) #other_share_lm <- lm(commit_share ~ before_after*relative_week, # data=other_long_df) summary(other_share_lmer) qqnorm(residuals(other_share_lm)) texreg(other_share_lm)