junior-sheer/notebooks/_moved.qmd
2024-03-08 12:50:47 -06:00

258 lines
8.9 KiB
Plaintext

---
title: "Moved Accounts"
---
```{r}
library(tidyverse)
library(jsonlite)
library(arrow)
library(statnet)
library(modelsummary)
library(here)
options(arrow.skip_nul = TRUE)
jm <- as_tibble(fromJSON(here("data/joinmastodon.json")))
moved_accounts <- arrow::read_feather(here("data/scratch/individual_moved_accounts.feather")) %>%
filter(server != "mastodon.nfriedly.com") %>%
filter(server != "vivaldi.social")
source(here("code/helpers.R"))
accounts <- load_accounts(filt = FALSE)
jm_move_counts <- inner_join(moved_accounts, accounts, by=c("moved_server"="server", "moved_acct"="username")) %>%
filter(created_at >= as.Date("2023-06-01")) %>%
filter(server %in% jm$domain) %>% filter(moved_server %in% jm$domain) %>%
filter(server != moved_server) %>%
group_by(server, moved_server) %>% summarize(count = n()) %>% arrange(desc(count))
is_mastodon <- arrow::read_feather(here("data/nodeinfo-2024-01-31.feather")) %>%
filter(str_detect(data_string, '"name": "mastodon"')) %>% select(server)
activity <-
arrow::read_feather(here("data/scratch/activity.feather"),
col_select = c("server", "logins")) %>%
arrange(desc(logins))
metadata <- arrow::read_feather(here("data/scratch/metadata.feather")) %>%
drop_na() %>%
filter(server %in% is_mastodon$server) %>%
unnest(languages) %>%
distinct(server, .keep_all=T) #%>% filter(user_count >= 10)
```
```{r}
#| execute: false
# library(statnet)
# n = data.frame(
# from = c(2,3,4,4),
# to = c(1,1,1,3),
# count = c(1,2,3,1)
# )
#
# e <- network(n, matrix.type = "edgelist")
# e %v% "size" <- c(1,2,1,100)
#
# m2 <-
# ergm(
# e ~ sum +
# nodeocov("size", form = "sum") +
# diff("size", dir="h-t", pow = 1, form = "sum"),
# response = "count",
# reference = ~ Binomial(3),
# control=control.ergm(parallel=4, parallel.type="PSOCK")
# )
#
# summary(m2)
```
```{r}
build_network <- function(move_counts, metadata, activity_data) {
edgelist <- move_counts %>%
filter(server %in% activity$server) %>%
filter(moved_server %in% activity$server) %>%
filter(server %in% metadata$server) %>%
filter(moved_server %in% metadata$server) %>% #filter(server %in% jm$domain) %>% filter(moved_server %in% jm$domain) %>%
select(server, moved_server, count) #%>% slice_sample(n=500)
edgeNet <- network(edgelist, matrix.type = "edgelist", directed=TRUE, loops=FALSE, multiple=FALSE)
activity_data <- left_join((as_tibble(edgeNet %v% 'vertex.names') %>% rename(server = value)), activity, by="server")
metadata_data <- left_join((as_tibble(edgeNet %v% 'vertex.names') %>% rename(server = value)), metadata, by="server")
edgeNet %v% "last_week_users" <- log1p(activity_data$logins)
edgeNet %v% "accounts" <- log(metadata_data$user_count)
edgeNet %v% "single_user" <- (metadata_data$user_count == 1)
edgeNet %v% "dead_server" <- (activity_data$logins == 0)
edgeNet %v% "jm" <- edgeNet %v% 'vertex.names' %in% jm$domain
edgeNet %v% "approval_required" <- metadata_data$approval_required
edgeNet %v% "registrations" <- metadata_data$registrations
edgeNet %v% "description" <- metadata_data$description
edgeNet %v% "invites_enabled" <- metadata_data$invites_enabled
edgeNet %v% "language" <- metadata$languages
return(edgeNet)
}
run_network <- function(network) {
model <-
ergm(
network ~ sum + nonzero +
#diff("last_week_users", dir="h-t", pow = 0, form = "sum") +
diff("accounts", dir="h-t", pow = 0, form = "sum") + # Do people move to smaller servers?
nodeocov("accounts", form = "sum") + # Do servers with more accounts have more outflow?
nodeifactor("registrations", form = "sum") + # Do servers with open registration get more inflow?
nodematch("language", form = "sum"),
response = "count",
reference = ~ Binomial(5),
control = control.ergm(MCMLE.maxit = 100, MCMC.effectiveSize = 50)
)
return(model)
}
move_counts.early <- inner_join(moved_accounts, accounts, by=c("server"="server", "account"="username")) %>%
filter(server %in% is_mastodon$server) %>%
filter(moved_server %in% is_mastodon$server) %>%
filter(created_at >= as.Date("2022-04-01")) %>%
filter(created_at < as.Date("2022-05-01")) %>%
filter(server != moved_server) %>%
group_by(server, moved_server) %>% summarize(count = n()) %>% arrange(desc(count)) %>%
ungroup()
move_counts.late <- inner_join(moved_accounts, accounts, by=c("moved_server"="server", "moved_acct"="username")) %>%
filter(server %in% is_mastodon$server) %>%
filter(moved_server %in% is_mastodon$server) %>%
filter(created_at >= as.Date("2023-10-20")) %>%
filter(server != moved_server) %>%
group_by(server, moved_server) %>% summarize(count = n()) %>% arrange(desc(count)) %>%
ungroup()
move_counts.late2 <- inner_join(moved_accounts, accounts, by=c("server"="server", "account"="username")) %>%
filter(server %in% is_mastodon$server) %>%
filter(moved_server %in% is_mastodon$server) %>%
filter(created_at >= as.Date("2023-10-20")) %>%
filter(server != moved_server) %>%
group_by(server, moved_server) %>% summarize(count = n()) %>% arrange(desc(count)) %>%
ungroup()
edgeNet.early <- build_network(move_counts.early, metadata, activity_data)
edgeNet.late <- build_network(move_counts.late2, metadata, activity_data)
```
```{r}
library(GGally)
edgeNet.early %e% "lcount" <- as.integer(log(edgeNet.early %e% "count")) + 1
edgeNet.early %e% "pcount" <- edgeNet.early %e% "lcount" / max(edgeNet.early %e% "lcount")
ggnet2(
edgeNet.early,
edge.size="lcount",
color = "black",
node.alpha = 0.75,
edge.alpha = 0.25,
edge.label.alpha = "pcount",#0.5,
max_size = 10,
size = "indegree",
#size = "degree",
#mode = "target",
#size.min = 1.1,
arrow.gap = 0.01, arrow.size = 5, arrow.type = "open"
) + coord_equal() + guides(color = FALSE, size = FALSE)
```
```{r}
library(GGally)
edgeNet.late %e% "lcount" <- as.integer(log(edgeNet.late %e% "count")) + 1
edgeNet.late %e% "pcount" <- edgeNet.late %e% "lcount" / max(edgeNet.late %e% "lcount")
ggnet2(
edgeNet.late,
edge.size="lcount",
color = "black",
node.alpha = 0.75,
edge.alpha = 0.25,
edge.label.alpha = "pcount",#0.5,
max_size = 10,
size = "indegree",
#size = "degree",
#mode = "target",
#size.min = 1.1,
arrow.gap = 0.01, arrow.size = 5, arrow.type = "open"
) + coord_equal() + guides(color = FALSE, size = FALSE)
```
```{r}
#| label: tbl-ergm
#| tbl-cap: ERGM model output
#| cache: true
#model.early <- run_network(edgeNet.early)
#model.late <- run_network(edgeNet.late)
#save(model.early, file = here("data/scratch/ergm-model-early.rda"))
#save(model.late, file = here("data/scratch/ergm-model-late.rda"))
load(file = here("data/scratch/ergm-model-early.rda"))
load(file = here("data/scratch/ergm-model-late.rda"))
library(kableExtra)
modelsummary(
list("Coef." = model.early, "Std.Error" = model.early, "Coef." = model.late, "Std.Error" = model.late),
estimate = c("{estimate}", "{stars}{std.error}", "{estimate}", "{stars}{std.error}"),
statistic = NULL,
gof_omit = ".*",
coef_rename = c(
"sum" = "(Sum)",
"diff.sum0.h-t.accounts" = "Smaller server",
"nodeocov.sum.accounts" = "Server size (outgoing)",
"nodeifactor.sum.registrations.TRUE" = "Open registrations (incoming)",
"nodematch.sum.language" = "Languages match"
),
align="lrrrr",
stars = c('*' = .05, '**' = 0.01, '***' = .001),
) %>%
add_header_above(c(" " = 1, "Model A" = 2, "Model B" = 2))
```
```{r}
#| eval: false
jm2023 <- as_tibble(fromJSON(here("data/joinmastodon-2023-08-25.json")))
move_counts.stable <- inner_join(moved_accounts, accounts, by=c("moved_server"="server", "moved_acct"="username")) %>%
filter(server %in% is_mastodon$server) %>%
filter(moved_server %in% is_mastodon$server) %>%
filter(created_at >= as.Date("2023-05-01")) %>%
filter(created_at < as.Date("2023-08-01")) %>%
filter(server != moved_server) %>%
group_by(server, moved_server) %>% summarize(count = n()) %>% arrange(desc(count)) %>%
ungroup() %>%
filter(server %in% jm2023$domain) %>%
filter(moved_server %in% jm2023$domain)
run_network2 <- function(network) {
model <-
ergm(
network ~ sum + nonzero +
#diff("last_week_users", dir="h-t", pow = 0, form = "sum") +
nodeocovar(center=TRUE,transform="sqrt") +
diff("accounts", dir="h-t", pow = 0, form = "sum") + # Do people move to smaller servers?
nodeocov("accounts", form = "sum") + # Do servers with more accounts have more outflow?
nodeifactor("registrations", form = "sum") + # Do servers with open registration get more inflow?
nodematch("language", form = "sum"),
response = "count",
reference = ~ Binomial(5),
control = control.ergm(MCMLE.maxit = 100, MCMC.effectiveSize = 50)
)
return(model)
}
move_counts.stable
edgeNet.stable <- build_network(move_counts.stable, metadata, activity_data)
model.stable <- run_network2(edgeNet.stable)
save(model.stable, file = here("data/scratch/ergm-model-stable.rda"))
```