new plots and scripts for ww
Before Width: | Height: | Size: 46 KiB |
Before Width: | Height: | Size: 95 KiB |
Before Width: | Height: | Size: 93 KiB |
Before Width: | Height: | Size: 102 KiB |
Before Width: | Height: | Size: 103 KiB |
BIN
commit_analysis/case1/0305-ve-testing-share.png
Normal file
After Width: | Height: | Size: 1.1 MiB |
Before Width: | Height: | Size: 1.1 MiB |
Before Width: | Height: | Size: 133 KiB |
@ -1,17 +1,17 @@
|
||||
library(tidyverse)
|
||||
entest_fp <-"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case1/en-testing_0217_extensions_ve_weekly_commit_count_data.csv"
|
||||
entest_fp <-"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case1/en-testing_0217_mediawiki_core_weekly_commit_count_data.csv"
|
||||
entest_df <- read.csv(entest_fp, header = TRUE) |> mutate(rd_event = "en-testing")
|
||||
|
||||
widetest_fp <-"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case1/wide-testing_0217_extensions_ve_weekly_commit_count_data.csv"
|
||||
widetest_fp <-"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case1/wide-testing_0217_mediawiki_core_weekly_commit_count_data.csv"
|
||||
widetest_df <- read.csv(widetest_fp, header = TRUE) |> mutate(rd_event = "wide-testing")
|
||||
|
||||
event_fp <-"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case1/event_0217_extensions_ve_weekly_commit_count_data.csv"
|
||||
event_fp <-"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case1/event_0217_mediawiki_core_weekly_commit_count_data.csv"
|
||||
event_df <- read.csv(event_fp, header = TRUE) |> mutate(rd_event = "default")
|
||||
|
||||
#input_df <- bind_rows(entest_df, widetest_df, event_df)
|
||||
#dropping the event (2013-07-01) from the modeling
|
||||
#input_df <- bind_rows(entest_df, widetest_df)
|
||||
input_df <- event_df
|
||||
input_df <- bind_rows(entest_df, widetest_df)
|
||||
#input_df <- event_df
|
||||
|
||||
input_df <- input_df |>
|
||||
mutate(nonbot_commit_count = commit_count - bot_commit_count)|>
|
||||
@ -78,33 +78,41 @@ qqnorm(residuals(mlm))
|
||||
res <- ranef(mlm)
|
||||
print(res)
|
||||
|
||||
texreg(mlm)
|
||||
#final_long_df <- final_long_df |>
|
||||
# drop_na()
|
||||
library(performance)
|
||||
library(texreg)
|
||||
|
||||
descdist(final_long_df$commit_share, discrete=FALSE)
|
||||
|
||||
|
||||
#descdist(final_long_df$commit_share, discrete=FALSE)
|
||||
|
||||
wikimedia_long_df <- final_long_df |>
|
||||
filter(commit_type == "wikimedia_commit_count")|>
|
||||
drop_na()
|
||||
|
||||
wikimedia_share_lm <- lm(commit_share ~ before_after*relative_week,
|
||||
#wikimedia_share_lm <- lm(commit_share ~ before_after*relative_week,
|
||||
# data=wikimedia_long_df)
|
||||
#summary(wikimedia_share_lm)
|
||||
#qqnorm(residuals(wikimedia_share_lm))
|
||||
wikimedia_share_lmer <- lmer(commit_share ~ before_after*relative_week +
|
||||
(before_after*relative_week | rd_event),
|
||||
data=wikimedia_long_df)
|
||||
summary(wikimedia_share_lm)
|
||||
qqnorm(residuals(wikimedia_share_lm))
|
||||
|
||||
summary(wikimedia_share_lmer)
|
||||
ranef(wikimedia_share_lmer)
|
||||
texreg(wikimedia_share_lm)
|
||||
|
||||
other_long_df <- final_long_df |>
|
||||
filter(commit_type == "other_commit_count") |>
|
||||
drop_na()
|
||||
#other_share_lmer <- lmer(commit_share ~ before_after*relative_week +
|
||||
# (1| rd_event),
|
||||
# data=other_long_df)
|
||||
other_share_lm <- lm(commit_share ~ before_after*relative_week,
|
||||
other_share_lmer <- lmer(commit_share ~ before_after*relative_week +
|
||||
(1| rd_event),
|
||||
data=other_long_df)
|
||||
summary(other_share_lm)
|
||||
#other_share_lm <- lm(commit_share ~ before_after*relative_week,
|
||||
# data=other_long_df)
|
||||
summary(other_share_lmer)
|
||||
qqnorm(residuals(other_share_lm))
|
||||
|
||||
texreg(other_share_lm)
|
||||
|
18
mgaughan-rstudio-server_24552723.out
Normal file
@ -0,0 +1,18 @@
|
||||
1. SSH tunnel from your workstation using the following command:
|
||||
|
||||
ssh -N -L 8787:n3439:50869 mjilg@klone.hyak.uw.edu
|
||||
|
||||
and point your web browser to http://localhost:8787
|
||||
|
||||
2. log in to RStudio Server using the following credentials:
|
||||
|
||||
user: mjilg
|
||||
password: vOFatg5CSDbh5vGq6k9d
|
||||
|
||||
When done using RStudio Server, terminate the job by:
|
||||
|
||||
1. Exit the RStudio Session ("power" button in the top right corner of the RStudio window)
|
||||
2. Issue the following command on the login node:
|
||||
|
||||
scancel -f 24552723
|
||||
slurmstepd: error: *** JOB 24552723 ON n3439 CANCELLED AT 2025-03-05T20:22:35 ***
|
After Width: | Height: | Size: 2.3 KiB |
BIN
text_analysis/case1/030525_ve_phab_comments.png
Normal file
After Width: | Height: | Size: 2.3 KiB |
@ -55,10 +55,12 @@ seed = 9021000
|
||||
# prevalence=~posix_timestamp,
|
||||
# verbose = TRUE)
|
||||
plot(model)
|
||||
saveRDS(model, file = "text_analysis/case1/030125_ve_rfc_stm.rds")
|
||||
#saveRDS(model, file = "text_analysis/case1/030125_ve_rfc_stm.rds")
|
||||
model <- readRDS(file = "text_analysis/case1/030125_ve_rfc_stm.rds")
|
||||
|
||||
labelTopics(model, topics = c(5, 4, 2, 3, 1), n = 10)
|
||||
|
||||
theta <- model$theta
|
||||
expected_topic_proportions <- colMeans(theta)
|
||||
|
||||
results = data.frame(text = corpus_subset(rfc_corp, docnames(rfc_corp) %in% rfc_dfm_stm$meta$doc_id), date = rfc_dfm_stm$meta$posix_timestamp, model$theta)
|
||||
|
||||
|