1
0

new plots and scripts for ww

This commit is contained in:
Matthew Gaughan 2025-03-05 21:43:06 -08:00
parent 28df3eb729
commit 7ec6a16597
16 changed files with 281 additions and 1983 deletions

Binary file not shown.

Before

Width:  |  Height:  |  Size: 46 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 95 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 93 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 102 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 103 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.1 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.1 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 133 KiB

View File

@ -1,17 +1,17 @@
library(tidyverse) library(tidyverse)
entest_fp <-"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case1/en-testing_0217_extensions_ve_weekly_commit_count_data.csv" entest_fp <-"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case1/en-testing_0217_mediawiki_core_weekly_commit_count_data.csv"
entest_df <- read.csv(entest_fp, header = TRUE) |> mutate(rd_event = "en-testing") entest_df <- read.csv(entest_fp, header = TRUE) |> mutate(rd_event = "en-testing")
widetest_fp <-"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case1/wide-testing_0217_extensions_ve_weekly_commit_count_data.csv" widetest_fp <-"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case1/wide-testing_0217_mediawiki_core_weekly_commit_count_data.csv"
widetest_df <- read.csv(widetest_fp, header = TRUE) |> mutate(rd_event = "wide-testing") widetest_df <- read.csv(widetest_fp, header = TRUE) |> mutate(rd_event = "wide-testing")
event_fp <-"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case1/event_0217_extensions_ve_weekly_commit_count_data.csv" event_fp <-"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case1/event_0217_mediawiki_core_weekly_commit_count_data.csv"
event_df <- read.csv(event_fp, header = TRUE) |> mutate(rd_event = "default") event_df <- read.csv(event_fp, header = TRUE) |> mutate(rd_event = "default")
#input_df <- bind_rows(entest_df, widetest_df, event_df) #input_df <- bind_rows(entest_df, widetest_df, event_df)
#dropping the event (2013-07-01) from the modeling #dropping the event (2013-07-01) from the modeling
#input_df <- bind_rows(entest_df, widetest_df) input_df <- bind_rows(entest_df, widetest_df)
input_df <- event_df #input_df <- event_df
input_df <- input_df |> input_df <- input_df |>
mutate(nonbot_commit_count = commit_count - bot_commit_count)|> mutate(nonbot_commit_count = commit_count - bot_commit_count)|>
@ -78,33 +78,41 @@ qqnorm(residuals(mlm))
res <- ranef(mlm) res <- ranef(mlm)
print(res) print(res)
texreg(mlm)
#final_long_df <- final_long_df |> #final_long_df <- final_long_df |>
# drop_na() # drop_na()
library(performance) library(performance)
library(texreg) library(texreg)
descdist(final_long_df$commit_share, discrete=FALSE)
#descdist(final_long_df$commit_share, discrete=FALSE)
wikimedia_long_df <- final_long_df |> wikimedia_long_df <- final_long_df |>
filter(commit_type == "wikimedia_commit_count")|> filter(commit_type == "wikimedia_commit_count")|>
drop_na() drop_na()
wikimedia_share_lm <- lm(commit_share ~ before_after*relative_week, #wikimedia_share_lm <- lm(commit_share ~ before_after*relative_week,
data=wikimedia_long_df) # data=wikimedia_long_df)
summary(wikimedia_share_lm) #summary(wikimedia_share_lm)
qqnorm(residuals(wikimedia_share_lm)) #qqnorm(residuals(wikimedia_share_lm))
wikimedia_share_lmer <- lmer(commit_share ~ before_after*relative_week +
(before_after*relative_week | rd_event),
data=wikimedia_long_df)
summary(wikimedia_share_lmer)
ranef(wikimedia_share_lmer)
texreg(wikimedia_share_lm) texreg(wikimedia_share_lm)
other_long_df <- final_long_df |> other_long_df <- final_long_df |>
filter(commit_type == "other_commit_count") |> filter(commit_type == "other_commit_count") |>
drop_na() drop_na()
#other_share_lmer <- lmer(commit_share ~ before_after*relative_week + other_share_lmer <- lmer(commit_share ~ before_after*relative_week +
# (1| rd_event), (1| rd_event),
# data=other_long_df) data=other_long_df)
other_share_lm <- lm(commit_share ~ before_after*relative_week, #other_share_lm <- lm(commit_share ~ before_after*relative_week,
data=other_long_df) # data=other_long_df)
summary(other_share_lm) summary(other_share_lmer)
qqnorm(residuals(other_share_lm)) qqnorm(residuals(other_share_lm))
texreg(other_share_lm) texreg(other_share_lm)

View File

@ -0,0 +1,18 @@
1. SSH tunnel from your workstation using the following command:
ssh -N -L 8787:n3439:50869 mjilg@klone.hyak.uw.edu
and point your web browser to http://localhost:8787
2. log in to RStudio Server using the following credentials:
user: mjilg
password: vOFatg5CSDbh5vGq6k9d
When done using RStudio Server, terminate the job by:
1. Exit the RStudio Session ("power" button in the top right corner of the RStudio window)
2. Issue the following command on the login node:
scancel -f 24552723
slurmstepd: error: *** JOB 24552723 ON n3439 CANCELLED AT 2025-03-05T20:22:35 ***

File diff suppressed because one or more lines are too long

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.3 KiB

File diff suppressed because one or more lines are too long

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.3 KiB

View File

@ -55,10 +55,12 @@ seed = 9021000
# prevalence=~posix_timestamp, # prevalence=~posix_timestamp,
# verbose = TRUE) # verbose = TRUE)
plot(model) plot(model)
saveRDS(model, file = "text_analysis/case1/030125_ve_rfc_stm.rds") #saveRDS(model, file = "text_analysis/case1/030125_ve_rfc_stm.rds")
model <- readRDS(file = "text_analysis/case1/030125_ve_rfc_stm.rds")
labelTopics(model, topics = c(5, 4, 2, 3, 1), n = 10) labelTopics(model, topics = c(5, 4, 2, 3, 1), n = 10)
theta <- model$theta
expected_topic_proportions <- colMeans(theta)
results = data.frame(text = corpus_subset(rfc_corp, docnames(rfc_corp) %in% rfc_dfm_stm$meta$doc_id), date = rfc_dfm_stm$meta$posix_timestamp, model$theta) results = data.frame(text = corpus_subset(rfc_corp, docnames(rfc_corp) %in% rfc_dfm_stm$meta$doc_id), date = rfc_dfm_stm$meta$posix_timestamp, model$theta)

File diff suppressed because one or more lines are too long