new plots and scripts for ww
Before Width: | Height: | Size: 46 KiB |
Before Width: | Height: | Size: 95 KiB |
Before Width: | Height: | Size: 93 KiB |
Before Width: | Height: | Size: 102 KiB |
Before Width: | Height: | Size: 103 KiB |
BIN
commit_analysis/case1/0305-ve-testing-share.png
Normal file
After Width: | Height: | Size: 1.1 MiB |
Before Width: | Height: | Size: 1.1 MiB |
Before Width: | Height: | Size: 133 KiB |
@ -1,17 +1,17 @@
|
|||||||
library(tidyverse)
|
library(tidyverse)
|
||||||
entest_fp <-"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case1/en-testing_0217_extensions_ve_weekly_commit_count_data.csv"
|
entest_fp <-"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case1/en-testing_0217_mediawiki_core_weekly_commit_count_data.csv"
|
||||||
entest_df <- read.csv(entest_fp, header = TRUE) |> mutate(rd_event = "en-testing")
|
entest_df <- read.csv(entest_fp, header = TRUE) |> mutate(rd_event = "en-testing")
|
||||||
|
|
||||||
widetest_fp <-"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case1/wide-testing_0217_extensions_ve_weekly_commit_count_data.csv"
|
widetest_fp <-"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case1/wide-testing_0217_mediawiki_core_weekly_commit_count_data.csv"
|
||||||
widetest_df <- read.csv(widetest_fp, header = TRUE) |> mutate(rd_event = "wide-testing")
|
widetest_df <- read.csv(widetest_fp, header = TRUE) |> mutate(rd_event = "wide-testing")
|
||||||
|
|
||||||
event_fp <-"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case1/event_0217_extensions_ve_weekly_commit_count_data.csv"
|
event_fp <-"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case1/event_0217_mediawiki_core_weekly_commit_count_data.csv"
|
||||||
event_df <- read.csv(event_fp, header = TRUE) |> mutate(rd_event = "default")
|
event_df <- read.csv(event_fp, header = TRUE) |> mutate(rd_event = "default")
|
||||||
|
|
||||||
#input_df <- bind_rows(entest_df, widetest_df, event_df)
|
#input_df <- bind_rows(entest_df, widetest_df, event_df)
|
||||||
#dropping the event (2013-07-01) from the modeling
|
#dropping the event (2013-07-01) from the modeling
|
||||||
#input_df <- bind_rows(entest_df, widetest_df)
|
input_df <- bind_rows(entest_df, widetest_df)
|
||||||
input_df <- event_df
|
#input_df <- event_df
|
||||||
|
|
||||||
input_df <- input_df |>
|
input_df <- input_df |>
|
||||||
mutate(nonbot_commit_count = commit_count - bot_commit_count)|>
|
mutate(nonbot_commit_count = commit_count - bot_commit_count)|>
|
||||||
@ -78,33 +78,41 @@ qqnorm(residuals(mlm))
|
|||||||
res <- ranef(mlm)
|
res <- ranef(mlm)
|
||||||
print(res)
|
print(res)
|
||||||
|
|
||||||
|
texreg(mlm)
|
||||||
#final_long_df <- final_long_df |>
|
#final_long_df <- final_long_df |>
|
||||||
# drop_na()
|
# drop_na()
|
||||||
library(performance)
|
library(performance)
|
||||||
library(texreg)
|
library(texreg)
|
||||||
|
|
||||||
descdist(final_long_df$commit_share, discrete=FALSE)
|
|
||||||
|
|
||||||
|
#descdist(final_long_df$commit_share, discrete=FALSE)
|
||||||
|
|
||||||
wikimedia_long_df <- final_long_df |>
|
wikimedia_long_df <- final_long_df |>
|
||||||
filter(commit_type == "wikimedia_commit_count")|>
|
filter(commit_type == "wikimedia_commit_count")|>
|
||||||
drop_na()
|
drop_na()
|
||||||
|
|
||||||
wikimedia_share_lm <- lm(commit_share ~ before_after*relative_week,
|
#wikimedia_share_lm <- lm(commit_share ~ before_after*relative_week,
|
||||||
data=wikimedia_long_df)
|
# data=wikimedia_long_df)
|
||||||
summary(wikimedia_share_lm)
|
#summary(wikimedia_share_lm)
|
||||||
qqnorm(residuals(wikimedia_share_lm))
|
#qqnorm(residuals(wikimedia_share_lm))
|
||||||
|
wikimedia_share_lmer <- lmer(commit_share ~ before_after*relative_week +
|
||||||
|
(before_after*relative_week | rd_event),
|
||||||
|
data=wikimedia_long_df)
|
||||||
|
|
||||||
|
summary(wikimedia_share_lmer)
|
||||||
|
ranef(wikimedia_share_lmer)
|
||||||
texreg(wikimedia_share_lm)
|
texreg(wikimedia_share_lm)
|
||||||
|
|
||||||
other_long_df <- final_long_df |>
|
other_long_df <- final_long_df |>
|
||||||
filter(commit_type == "other_commit_count") |>
|
filter(commit_type == "other_commit_count") |>
|
||||||
drop_na()
|
drop_na()
|
||||||
#other_share_lmer <- lmer(commit_share ~ before_after*relative_week +
|
other_share_lmer <- lmer(commit_share ~ before_after*relative_week +
|
||||||
# (1| rd_event),
|
(1| rd_event),
|
||||||
# data=other_long_df)
|
data=other_long_df)
|
||||||
other_share_lm <- lm(commit_share ~ before_after*relative_week,
|
#other_share_lm <- lm(commit_share ~ before_after*relative_week,
|
||||||
data=other_long_df)
|
# data=other_long_df)
|
||||||
summary(other_share_lm)
|
summary(other_share_lmer)
|
||||||
qqnorm(residuals(other_share_lm))
|
qqnorm(residuals(other_share_lm))
|
||||||
|
|
||||||
texreg(other_share_lm)
|
texreg(other_share_lm)
|
||||||
|
18
mgaughan-rstudio-server_24552723.out
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
1. SSH tunnel from your workstation using the following command:
|
||||||
|
|
||||||
|
ssh -N -L 8787:n3439:50869 mjilg@klone.hyak.uw.edu
|
||||||
|
|
||||||
|
and point your web browser to http://localhost:8787
|
||||||
|
|
||||||
|
2. log in to RStudio Server using the following credentials:
|
||||||
|
|
||||||
|
user: mjilg
|
||||||
|
password: vOFatg5CSDbh5vGq6k9d
|
||||||
|
|
||||||
|
When done using RStudio Server, terminate the job by:
|
||||||
|
|
||||||
|
1. Exit the RStudio Session ("power" button in the top right corner of the RStudio window)
|
||||||
|
2. Issue the following command on the login node:
|
||||||
|
|
||||||
|
scancel -f 24552723
|
||||||
|
slurmstepd: error: *** JOB 24552723 ON n3439 CANCELLED AT 2025-03-05T20:22:35 ***
|
After Width: | Height: | Size: 2.3 KiB |
BIN
text_analysis/case1/030525_ve_phab_comments.png
Normal file
After Width: | Height: | Size: 2.3 KiB |
@ -55,10 +55,12 @@ seed = 9021000
|
|||||||
# prevalence=~posix_timestamp,
|
# prevalence=~posix_timestamp,
|
||||||
# verbose = TRUE)
|
# verbose = TRUE)
|
||||||
plot(model)
|
plot(model)
|
||||||
saveRDS(model, file = "text_analysis/case1/030125_ve_rfc_stm.rds")
|
#saveRDS(model, file = "text_analysis/case1/030125_ve_rfc_stm.rds")
|
||||||
|
model <- readRDS(file = "text_analysis/case1/030125_ve_rfc_stm.rds")
|
||||||
|
|
||||||
labelTopics(model, topics = c(5, 4, 2, 3, 1), n = 10)
|
labelTopics(model, topics = c(5, 4, 2, 3, 1), n = 10)
|
||||||
|
theta <- model$theta
|
||||||
|
expected_topic_proportions <- colMeans(theta)
|
||||||
|
|
||||||
results = data.frame(text = corpus_subset(rfc_corp, docnames(rfc_corp) %in% rfc_dfm_stm$meta$doc_id), date = rfc_dfm_stm$meta$posix_timestamp, model$theta)
|
results = data.frame(text = corpus_subset(rfc_corp, docnames(rfc_corp) %in% rfc_dfm_stm$meta$doc_id), date = rfc_dfm_stm$meta$posix_timestamp, model$theta)
|
||||||
|
|
||||||
|