258 lines
8.9 KiB
Plaintext
258 lines
8.9 KiB
Plaintext
---
|
|
title: "Moved Accounts"
|
|
---
|
|
|
|
```{r}
|
|
library(tidyverse)
|
|
library(jsonlite)
|
|
library(arrow)
|
|
library(statnet)
|
|
library(modelsummary)
|
|
library(here)
|
|
|
|
options(arrow.skip_nul = TRUE)
|
|
|
|
jm <- as_tibble(fromJSON(here("data/joinmastodon.json")))
|
|
|
|
moved_accounts <- arrow::read_feather(here("data/scratch/individual_moved_accounts.feather")) %>%
|
|
filter(server != "mastodon.nfriedly.com") %>%
|
|
filter(server != "vivaldi.social")
|
|
|
|
source(here("code/helpers.R"))
|
|
accounts <- load_accounts(filt = FALSE)
|
|
|
|
jm_move_counts <- inner_join(moved_accounts, accounts, by=c("moved_server"="server", "moved_acct"="username")) %>%
|
|
filter(created_at >= as.Date("2023-06-01")) %>%
|
|
filter(server %in% jm$domain) %>% filter(moved_server %in% jm$domain) %>%
|
|
filter(server != moved_server) %>%
|
|
group_by(server, moved_server) %>% summarize(count = n()) %>% arrange(desc(count))
|
|
|
|
|
|
is_mastodon <- arrow::read_feather(here("data/nodeinfo-2024-01-31.feather")) %>%
|
|
filter(str_detect(data_string, '"name": "mastodon"')) %>% select(server)
|
|
|
|
activity <-
|
|
arrow::read_feather(here("data/scratch/activity.feather"),
|
|
col_select = c("server", "logins")) %>%
|
|
arrange(desc(logins))
|
|
|
|
metadata <- arrow::read_feather(here("data/scratch/metadata.feather")) %>%
|
|
drop_na() %>%
|
|
filter(server %in% is_mastodon$server) %>%
|
|
unnest(languages) %>%
|
|
distinct(server, .keep_all=T) #%>% filter(user_count >= 10)
|
|
```
|
|
|
|
```{r}
|
|
#| execute: false
|
|
# library(statnet)
|
|
# n = data.frame(
|
|
# from = c(2,3,4,4),
|
|
# to = c(1,1,1,3),
|
|
# count = c(1,2,3,1)
|
|
# )
|
|
#
|
|
# e <- network(n, matrix.type = "edgelist")
|
|
# e %v% "size" <- c(1,2,1,100)
|
|
#
|
|
# m2 <-
|
|
# ergm(
|
|
# e ~ sum +
|
|
# nodeocov("size", form = "sum") +
|
|
# diff("size", dir="h-t", pow = 1, form = "sum"),
|
|
# response = "count",
|
|
# reference = ~ Binomial(3),
|
|
# control=control.ergm(parallel=4, parallel.type="PSOCK")
|
|
# )
|
|
#
|
|
# summary(m2)
|
|
```
|
|
|
|
|
|
|
|
```{r}
|
|
build_network <- function(move_counts, metadata, activity_data) {
|
|
edgelist <- move_counts %>%
|
|
filter(server %in% activity$server) %>%
|
|
filter(moved_server %in% activity$server) %>%
|
|
filter(server %in% metadata$server) %>%
|
|
filter(moved_server %in% metadata$server) %>% #filter(server %in% jm$domain) %>% filter(moved_server %in% jm$domain) %>%
|
|
select(server, moved_server, count) #%>% slice_sample(n=500)
|
|
|
|
edgeNet <- network(edgelist, matrix.type = "edgelist", directed=TRUE, loops=FALSE, multiple=FALSE)
|
|
|
|
activity_data <- left_join((as_tibble(edgeNet %v% 'vertex.names') %>% rename(server = value)), activity, by="server")
|
|
metadata_data <- left_join((as_tibble(edgeNet %v% 'vertex.names') %>% rename(server = value)), metadata, by="server")
|
|
|
|
edgeNet %v% "last_week_users" <- log1p(activity_data$logins)
|
|
edgeNet %v% "accounts" <- log(metadata_data$user_count)
|
|
edgeNet %v% "single_user" <- (metadata_data$user_count == 1)
|
|
edgeNet %v% "dead_server" <- (activity_data$logins == 0)
|
|
edgeNet %v% "jm" <- edgeNet %v% 'vertex.names' %in% jm$domain
|
|
edgeNet %v% "approval_required" <- metadata_data$approval_required
|
|
edgeNet %v% "registrations" <- metadata_data$registrations
|
|
edgeNet %v% "description" <- metadata_data$description
|
|
edgeNet %v% "invites_enabled" <- metadata_data$invites_enabled
|
|
edgeNet %v% "language" <- metadata$languages
|
|
return(edgeNet)
|
|
}
|
|
|
|
run_network <- function(network) {
|
|
model <-
|
|
ergm(
|
|
network ~ sum + nonzero +
|
|
#diff("last_week_users", dir="h-t", pow = 0, form = "sum") +
|
|
diff("accounts", dir="h-t", pow = 0, form = "sum") + # Do people move to smaller servers?
|
|
nodeocov("accounts", form = "sum") + # Do servers with more accounts have more outflow?
|
|
nodeifactor("registrations", form = "sum") + # Do servers with open registration get more inflow?
|
|
nodematch("language", form = "sum"),
|
|
response = "count",
|
|
reference = ~ Binomial(5),
|
|
control = control.ergm(MCMLE.maxit = 100, MCMC.effectiveSize = 50)
|
|
)
|
|
return(model)
|
|
}
|
|
|
|
move_counts.early <- inner_join(moved_accounts, accounts, by=c("server"="server", "account"="username")) %>%
|
|
filter(server %in% is_mastodon$server) %>%
|
|
filter(moved_server %in% is_mastodon$server) %>%
|
|
filter(created_at >= as.Date("2022-04-01")) %>%
|
|
filter(created_at < as.Date("2022-05-01")) %>%
|
|
filter(server != moved_server) %>%
|
|
group_by(server, moved_server) %>% summarize(count = n()) %>% arrange(desc(count)) %>%
|
|
ungroup()
|
|
|
|
move_counts.late <- inner_join(moved_accounts, accounts, by=c("moved_server"="server", "moved_acct"="username")) %>%
|
|
filter(server %in% is_mastodon$server) %>%
|
|
filter(moved_server %in% is_mastodon$server) %>%
|
|
filter(created_at >= as.Date("2023-10-20")) %>%
|
|
filter(server != moved_server) %>%
|
|
group_by(server, moved_server) %>% summarize(count = n()) %>% arrange(desc(count)) %>%
|
|
ungroup()
|
|
|
|
move_counts.late2 <- inner_join(moved_accounts, accounts, by=c("server"="server", "account"="username")) %>%
|
|
filter(server %in% is_mastodon$server) %>%
|
|
filter(moved_server %in% is_mastodon$server) %>%
|
|
filter(created_at >= as.Date("2023-10-20")) %>%
|
|
filter(server != moved_server) %>%
|
|
group_by(server, moved_server) %>% summarize(count = n()) %>% arrange(desc(count)) %>%
|
|
ungroup()
|
|
|
|
edgeNet.early <- build_network(move_counts.early, metadata, activity_data)
|
|
edgeNet.late <- build_network(move_counts.late2, metadata, activity_data)
|
|
```
|
|
|
|
|
|
```{r}
|
|
library(GGally)
|
|
|
|
edgeNet.early %e% "lcount" <- as.integer(log(edgeNet.early %e% "count")) + 1
|
|
edgeNet.early %e% "pcount" <- edgeNet.early %e% "lcount" / max(edgeNet.early %e% "lcount")
|
|
|
|
ggnet2(
|
|
edgeNet.early,
|
|
edge.size="lcount",
|
|
color = "black",
|
|
node.alpha = 0.75,
|
|
edge.alpha = 0.25,
|
|
edge.label.alpha = "pcount",#0.5,
|
|
max_size = 10,
|
|
size = "indegree",
|
|
#size = "degree",
|
|
#mode = "target",
|
|
#size.min = 1.1,
|
|
arrow.gap = 0.01, arrow.size = 5, arrow.type = "open"
|
|
) + coord_equal() + guides(color = FALSE, size = FALSE)
|
|
```
|
|
|
|
```{r}
|
|
library(GGally)
|
|
|
|
edgeNet.late %e% "lcount" <- as.integer(log(edgeNet.late %e% "count")) + 1
|
|
edgeNet.late %e% "pcount" <- edgeNet.late %e% "lcount" / max(edgeNet.late %e% "lcount")
|
|
|
|
ggnet2(
|
|
edgeNet.late,
|
|
edge.size="lcount",
|
|
color = "black",
|
|
node.alpha = 0.75,
|
|
edge.alpha = 0.25,
|
|
edge.label.alpha = "pcount",#0.5,
|
|
max_size = 10,
|
|
size = "indegree",
|
|
#size = "degree",
|
|
#mode = "target",
|
|
#size.min = 1.1,
|
|
arrow.gap = 0.01, arrow.size = 5, arrow.type = "open"
|
|
) + coord_equal() + guides(color = FALSE, size = FALSE)
|
|
```
|
|
|
|
|
|
```{r}
|
|
#| label: tbl-ergm
|
|
#| tbl-cap: ERGM model output
|
|
#| cache: true
|
|
#model.early <- run_network(edgeNet.early)
|
|
#model.late <- run_network(edgeNet.late)
|
|
#save(model.early, file = here("data/scratch/ergm-model-early.rda"))
|
|
#save(model.late, file = here("data/scratch/ergm-model-late.rda"))
|
|
load(file = here("data/scratch/ergm-model-early.rda"))
|
|
load(file = here("data/scratch/ergm-model-late.rda"))
|
|
library(kableExtra)
|
|
modelsummary(
|
|
list("Coef." = model.early, "Std.Error" = model.early, "Coef." = model.late, "Std.Error" = model.late),
|
|
estimate = c("{estimate}", "{stars}{std.error}", "{estimate}", "{stars}{std.error}"),
|
|
statistic = NULL,
|
|
gof_omit = ".*",
|
|
coef_rename = c(
|
|
"sum" = "(Sum)",
|
|
"diff.sum0.h-t.accounts" = "Smaller server",
|
|
"nodeocov.sum.accounts" = "Server size (outgoing)",
|
|
"nodeifactor.sum.registrations.TRUE" = "Open registrations (incoming)",
|
|
"nodematch.sum.language" = "Languages match"
|
|
),
|
|
align="lrrrr",
|
|
stars = c('*' = .05, '**' = 0.01, '***' = .001),
|
|
) %>%
|
|
add_header_above(c(" " = 1, "Model A" = 2, "Model B" = 2))
|
|
```
|
|
|
|
```{r}
|
|
#| eval: false
|
|
jm2023 <- as_tibble(fromJSON(here("data/joinmastodon-2023-08-25.json")))
|
|
|
|
move_counts.stable <- inner_join(moved_accounts, accounts, by=c("moved_server"="server", "moved_acct"="username")) %>%
|
|
filter(server %in% is_mastodon$server) %>%
|
|
filter(moved_server %in% is_mastodon$server) %>%
|
|
filter(created_at >= as.Date("2023-05-01")) %>%
|
|
filter(created_at < as.Date("2023-08-01")) %>%
|
|
filter(server != moved_server) %>%
|
|
group_by(server, moved_server) %>% summarize(count = n()) %>% arrange(desc(count)) %>%
|
|
ungroup() %>%
|
|
filter(server %in% jm2023$domain) %>%
|
|
filter(moved_server %in% jm2023$domain)
|
|
|
|
run_network2 <- function(network) {
|
|
model <-
|
|
ergm(
|
|
network ~ sum + nonzero +
|
|
#diff("last_week_users", dir="h-t", pow = 0, form = "sum") +
|
|
nodeocovar(center=TRUE,transform="sqrt") +
|
|
diff("accounts", dir="h-t", pow = 0, form = "sum") + # Do people move to smaller servers?
|
|
nodeocov("accounts", form = "sum") + # Do servers with more accounts have more outflow?
|
|
nodeifactor("registrations", form = "sum") + # Do servers with open registration get more inflow?
|
|
nodematch("language", form = "sum"),
|
|
response = "count",
|
|
reference = ~ Binomial(5),
|
|
control = control.ergm(MCMLE.maxit = 100, MCMC.effectiveSize = 50)
|
|
)
|
|
return(model)
|
|
}
|
|
|
|
|
|
move_counts.stable
|
|
edgeNet.stable <- build_network(move_counts.stable, metadata, activity_data)
|
|
model.stable <- run_network2(edgeNet.stable)
|
|
save(model.stable, file = here("data/scratch/ergm-model-stable.rda"))
|
|
```
|