new plots and scripts for ww

2025-03-05 21:43:06 -08:00 · 2025-03-05 21:43:06 -08:00 · 7ec6a16597
commit 7ec6a16597
parent 28df3eb729
16 changed files with 281 additions and 1983 deletions
--- a/commit_analysis/case1/0301-ve-testing-commit-plot.png
+++ b/commit_analysis/case1/0301-ve-testing-commit-plot.png
--- a/commit_analysis/case1/030125_ve-testing-share-ba.png
+++ b/commit_analysis/case1/030125_ve-testing-share-ba.png
--- a/commit_analysis/case1/030125_ve_testing_commits_ba_plot.png
+++ b/commit_analysis/case1/030125_ve_testing_commits_ba_plot.png
--- a/commit_analysis/case1/030225_core_commit_count_ba.png
+++ b/commit_analysis/case1/030225_core_commit_count_ba.png
--- a/commit_analysis/case1/030225_core_share_ba.png
+++ b/commit_analysis/case1/030225_core_share_ba.png
--- a/commit_analysis/case1/0305-ve-testing-share.png
+++ b/commit_analysis/case1/0305-ve-testing-share.png
--- a/commit_analysis/case1/0305-ve-total-commits.png
+++ b/commit_analysis/case1/0305-ve-total-commits.png
--- a/commit_analysis/case1/030525_ve_commit_plot.png
+++ b/commit_analysis/case1/030525_ve_commit_plot.png
--- a/commit_analysis/matched_rdd_models.R
+++ b/commit_analysis/matched_rdd_models.R
@ -1,17 +1,17 @@
 library(tidyverse)
-entest_fp <-"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case1/en-testing_0217_extensions_ve_weekly_commit_count_data.csv"
+entest_fp <-"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case1/en-testing_0217_mediawiki_core_weekly_commit_count_data.csv"
 entest_df <- read.csv(entest_fp, header = TRUE) |> mutate(rd_event = "en-testing")
-widetest_fp <-"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case1/wide-testing_0217_extensions_ve_weekly_commit_count_data.csv"
+widetest_fp <-"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case1/wide-testing_0217_mediawiki_core_weekly_commit_count_data.csv"
 widetest_df <- read.csv(widetest_fp, header = TRUE) |> mutate(rd_event = "wide-testing")
-event_fp <-"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case1/event_0217_extensions_ve_weekly_commit_count_data.csv"
+event_fp <-"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case1/event_0217_mediawiki_core_weekly_commit_count_data.csv"
 event_df <- read.csv(event_fp, header = TRUE) |> mutate(rd_event = "default")
 #input_df <- bind_rows(entest_df, widetest_df, event_df)
 #dropping the event (2013-07-01) from the modeling
-#input_df <- bind_rows(entest_df, widetest_df)
+input_df <- bind_rows(entest_df, widetest_df)
-input_df <- event_df
+#input_df <- event_df
 input_df <- input_df |>
  mutate(nonbot_commit_count = commit_count - bot_commit_count)|>
@ -78,33 +78,41 @@ qqnorm(residuals(mlm))
 res <- ranef(mlm)
 print(res)
 texreg(mlm)
 #final_long_df <- final_long_df |>
 #  drop_na()
 library(performance)
 library(texreg)
-descdist(final_long_df$commit_share, discrete=FALSE)
+
 #descdist(final_long_df$commit_share, discrete=FALSE)
 wikimedia_long_df <- final_long_df |>
  filter(commit_type == "wikimedia_commit_count")|>
  drop_na()
-wikimedia_share_lm <- lm(commit_share ~ before_after*relative_week,
+#wikimedia_share_lm <- lm(commit_share ~ before_after*relative_week,
-                         data=wikimedia_long_df)
+#                         data=wikimedia_long_df)
-summary(wikimedia_share_lm)
+#summary(wikimedia_share_lm)
-qqnorm(residuals(wikimedia_share_lm))
+#qqnorm(residuals(wikimedia_share_lm))
 wikimedia_share_lmer <- lmer(commit_share ~ before_after*relative_week + 
                               (before_after*relative_week | rd_event),
                             data=wikimedia_long_df)
 summary(wikimedia_share_lmer)
 ranef(wikimedia_share_lmer)
 texreg(wikimedia_share_lm)
 other_long_df <- final_long_df |>
  filter(commit_type == "other_commit_count") |>
  drop_na()
-#other_share_lmer <- lmer(commit_share ~ before_after*relative_week +
+other_share_lmer <- lmer(commit_share ~ before_after*relative_week +
-#                         (1| rd_event),
+                         (1| rd_event),
-#                           data=other_long_df)
+                           data=other_long_df)
-other_share_lm <- lm(commit_share ~ before_after*relative_week, 
+#other_share_lm <- lm(commit_share ~ before_after*relative_week, 
-                     data=other_long_df)
+#                     data=other_long_df)
-summary(other_share_lm)
+summary(other_share_lmer)
 qqnorm(residuals(other_share_lm))
 texreg(other_share_lm)
--- a/mgaughan-rstudio-server_24552723.out
+++ b/mgaughan-rstudio-server_24552723.out
@ -0,0 +1,18 @@
 1. SSH tunnel from your workstation using the following command:
   ssh -N -L 8787:n3439:50869 mjilg@klone.hyak.uw.edu
   and point your web browser to http://localhost:8787
 2. log in to RStudio Server using the following credentials:
   user: mjilg
   password: vOFatg5CSDbh5vGq6k9d
 When done using RStudio Server, terminate the job by:
 1. Exit the RStudio Session ("power" button in the top right corner of the RStudio window)
 2. Issue the following command on the login node:
      scancel -f 24552723
 slurmstepd: error: *** JOB 24552723 ON n3439 CANCELLED AT 2025-03-05T20:22:35 ***
--- a/text_analysis/.ipynb_checkpoints/ve_dependency-checkpoint.ipynb
+++ b/text_analysis/.ipynb_checkpoints/ve_dependency-checkpoint.ipynb
--- a/text_analysis/case1/.ipynb_checkpoints/030525_ve_phab_comments-checkpoint.png
+++ b/text_analysis/case1/.ipynb_checkpoints/030525_ve_phab_comments-checkpoint.png
--- a/text_analysis/case1/.ipynb_checkpoints/ve_dependency-checkpoint.ipynb
+++ b/text_analysis/case1/.ipynb_checkpoints/ve_dependency-checkpoint.ipynb
--- a/text_analysis/case1/030525_ve_phab_comments.png
+++ b/text_analysis/case1/030525_ve_phab_comments.png
--- a/text_analysis/case1/case1_stm.R
+++ b/text_analysis/case1/case1_stm.R
@ -55,10 +55,12 @@ seed = 9021000
 #            prevalence=~posix_timestamp,
 #            verbose = TRUE)
 plot(model)
-saveRDS(model, file = "text_analysis/case1/030125_ve_rfc_stm.rds")
+#saveRDS(model, file = "text_analysis/case1/030125_ve_rfc_stm.rds")
 model <- readRDS(file = "text_analysis/case1/030125_ve_rfc_stm.rds")
 labelTopics(model, topics = c(5, 4, 2, 3, 1), n = 10)
-
+theta <- model$theta
 expected_topic_proportions <- colMeans(theta)
 results = data.frame(text = corpus_subset(rfc_corp, docnames(rfc_corp) %in% rfc_dfm_stm$meta$doc_id), date =  rfc_dfm_stm$meta$posix_timestamp, model$theta)
--- a/text_analysis/case1/ve_dependency.ipynb
+++ b/text_analysis/case1/ve_dependency.ipynb