--- title: "Moved Accounts" --- ```{r} library(tidyverse) library(jsonlite) library(arrow) library(statnet) library(modelsummary) library(here) options(arrow.skip_nul = TRUE) jm <- as_tibble(fromJSON(here("data/joinmastodon.json"))) moved_accounts <- arrow::read_feather(here("data/scratch/individual_moved_accounts.feather")) %>% filter(server != "mastodon.nfriedly.com") %>% filter(server != "vivaldi.social") source(here("code/helpers.R")) accounts <- load_accounts(filt = FALSE) jm_move_counts <- inner_join(moved_accounts, accounts, by=c("moved_server"="server", "moved_acct"="username")) %>% filter(created_at >= as.Date("2023-06-01")) %>% filter(server %in% jm$domain) %>% filter(moved_server %in% jm$domain) %>% filter(server != moved_server) %>% group_by(server, moved_server) %>% summarize(count = n()) %>% arrange(desc(count)) is_mastodon <- arrow::read_feather(here("data/nodeinfo-2024-01-31.feather")) %>% filter(str_detect(data_string, '"name": "mastodon"')) %>% select(server) activity <- arrow::read_feather(here("data/scratch/activity.feather"), col_select = c("server", "logins")) %>% arrange(desc(logins)) metadata <- arrow::read_feather(here("data/scratch/metadata.feather")) %>% drop_na() %>% filter(server %in% is_mastodon$server) %>% unnest(languages) %>% distinct(server, .keep_all=T) #%>% filter(user_count >= 10) ``` ```{r} #| execute: false # library(statnet) # n = data.frame( # from = c(2,3,4,4), # to = c(1,1,1,3), # count = c(1,2,3,1) # ) # # e <- network(n, matrix.type = "edgelist") # e %v% "size" <- c(1,2,1,100) # # m2 <- # ergm( # e ~ sum + # nodeocov("size", form = "sum") + # diff("size", dir="h-t", pow = 1, form = "sum"), # response = "count", # reference = ~ Binomial(3), # control=control.ergm(parallel=4, parallel.type="PSOCK") # ) # # summary(m2) ``` ```{r} build_network <- function(move_counts, metadata, activity_data) { edgelist <- move_counts %>% filter(server %in% activity$server) %>% filter(moved_server %in% activity$server) %>% filter(server %in% metadata$server) %>% filter(moved_server %in% metadata$server) %>% #filter(server %in% jm$domain) %>% filter(moved_server %in% jm$domain) %>% select(server, moved_server, count) #%>% slice_sample(n=500) edgeNet <- network(edgelist, matrix.type = "edgelist", directed=TRUE, loops=FALSE, multiple=FALSE) activity_data <- left_join((as_tibble(edgeNet %v% 'vertex.names') %>% rename(server = value)), activity, by="server") metadata_data <- left_join((as_tibble(edgeNet %v% 'vertex.names') %>% rename(server = value)), metadata, by="server") edgeNet %v% "last_week_users" <- log1p(activity_data$logins) edgeNet %v% "accounts" <- log(metadata_data$user_count) edgeNet %v% "single_user" <- (metadata_data$user_count == 1) edgeNet %v% "dead_server" <- (activity_data$logins == 0) edgeNet %v% "jm" <- edgeNet %v% 'vertex.names' %in% jm$domain edgeNet %v% "approval_required" <- metadata_data$approval_required edgeNet %v% "registrations" <- metadata_data$registrations edgeNet %v% "description" <- metadata_data$description edgeNet %v% "invites_enabled" <- metadata_data$invites_enabled edgeNet %v% "language" <- metadata$languages return(edgeNet) } run_network <- function(network) { model <- ergm( network ~ sum + nonzero + #diff("last_week_users", dir="h-t", pow = 0, form = "sum") + diff("accounts", dir="h-t", pow = 0, form = "sum") + # Do people move to smaller servers? nodeocov("accounts", form = "sum") + # Do servers with more accounts have more outflow? nodeifactor("registrations", form = "sum") + # Do servers with open registration get more inflow? nodematch("language", form = "sum"), response = "count", reference = ~ Binomial(5), control = control.ergm(MCMLE.maxit = 100, MCMC.effectiveSize = 50) ) return(model) } move_counts.early <- inner_join(moved_accounts, accounts, by=c("server"="server", "account"="username")) %>% filter(server %in% is_mastodon$server) %>% filter(moved_server %in% is_mastodon$server) %>% filter(created_at >= as.Date("2022-04-01")) %>% filter(created_at < as.Date("2022-05-01")) %>% filter(server != moved_server) %>% group_by(server, moved_server) %>% summarize(count = n()) %>% arrange(desc(count)) %>% ungroup() move_counts.late <- inner_join(moved_accounts, accounts, by=c("moved_server"="server", "moved_acct"="username")) %>% filter(server %in% is_mastodon$server) %>% filter(moved_server %in% is_mastodon$server) %>% filter(created_at >= as.Date("2023-10-20")) %>% filter(server != moved_server) %>% group_by(server, moved_server) %>% summarize(count = n()) %>% arrange(desc(count)) %>% ungroup() move_counts.late2 <- inner_join(moved_accounts, accounts, by=c("server"="server", "account"="username")) %>% filter(server %in% is_mastodon$server) %>% filter(moved_server %in% is_mastodon$server) %>% filter(created_at >= as.Date("2023-10-20")) %>% filter(server != moved_server) %>% group_by(server, moved_server) %>% summarize(count = n()) %>% arrange(desc(count)) %>% ungroup() edgeNet.early <- build_network(move_counts.early, metadata, activity_data) edgeNet.late <- build_network(move_counts.late2, metadata, activity_data) ``` ```{r} library(GGally) edgeNet.early %e% "lcount" <- as.integer(log(edgeNet.early %e% "count")) + 1 edgeNet.early %e% "pcount" <- edgeNet.early %e% "lcount" / max(edgeNet.early %e% "lcount") ggnet2( edgeNet.early, edge.size="lcount", color = "black", node.alpha = 0.75, edge.alpha = 0.25, edge.label.alpha = "pcount",#0.5, max_size = 10, size = "indegree", #size = "degree", #mode = "target", #size.min = 1.1, arrow.gap = 0.01, arrow.size = 5, arrow.type = "open" ) + coord_equal() + guides(color = FALSE, size = FALSE) ``` ```{r} library(GGally) edgeNet.late %e% "lcount" <- as.integer(log(edgeNet.late %e% "count")) + 1 edgeNet.late %e% "pcount" <- edgeNet.late %e% "lcount" / max(edgeNet.late %e% "lcount") ggnet2( edgeNet.late, edge.size="lcount", color = "black", node.alpha = 0.75, edge.alpha = 0.25, edge.label.alpha = "pcount",#0.5, max_size = 10, size = "indegree", #size = "degree", #mode = "target", #size.min = 1.1, arrow.gap = 0.01, arrow.size = 5, arrow.type = "open" ) + coord_equal() + guides(color = FALSE, size = FALSE) ``` ```{r} #| label: tbl-ergm #| tbl-cap: ERGM model output #| cache: true #model.early <- run_network(edgeNet.early) #model.late <- run_network(edgeNet.late) #save(model.early, file = here("data/scratch/ergm-model-early.rda")) #save(model.late, file = here("data/scratch/ergm-model-late.rda")) load(file = here("data/scratch/ergm-model-early.rda")) load(file = here("data/scratch/ergm-model-late.rda")) library(kableExtra) modelsummary( list("Coef." = model.early, "Std.Error" = model.early, "Coef." = model.late, "Std.Error" = model.late), estimate = c("{estimate}", "{stars}{std.error}", "{estimate}", "{stars}{std.error}"), statistic = NULL, gof_omit = ".*", coef_rename = c( "sum" = "(Sum)", "diff.sum0.h-t.accounts" = "Smaller server", "nodeocov.sum.accounts" = "Server size (outgoing)", "nodeifactor.sum.registrations.TRUE" = "Open registrations (incoming)", "nodematch.sum.language" = "Languages match" ), align="lrrrr", stars = c('*' = .05, '**' = 0.01, '***' = .001), ) %>% add_header_above(c(" " = 1, "Model A" = 2, "Model B" = 2)) ``` ```{r} #| eval: false jm2023 <- as_tibble(fromJSON(here("data/joinmastodon-2023-08-25.json"))) move_counts.stable <- inner_join(moved_accounts, accounts, by=c("moved_server"="server", "moved_acct"="username")) %>% filter(server %in% is_mastodon$server) %>% filter(moved_server %in% is_mastodon$server) %>% filter(created_at >= as.Date("2023-05-01")) %>% filter(created_at < as.Date("2023-08-01")) %>% filter(server != moved_server) %>% group_by(server, moved_server) %>% summarize(count = n()) %>% arrange(desc(count)) %>% ungroup() %>% filter(server %in% jm2023$domain) %>% filter(moved_server %in% jm2023$domain) run_network2 <- function(network) { model <- ergm( network ~ sum + nonzero + #diff("last_week_users", dir="h-t", pow = 0, form = "sum") + nodeocovar(center=TRUE,transform="sqrt") + diff("accounts", dir="h-t", pow = 0, form = "sum") + # Do people move to smaller servers? nodeocov("accounts", form = "sum") + # Do servers with more accounts have more outflow? nodeifactor("registrations", form = "sum") + # Do servers with open registration get more inflow? nodematch("language", form = "sum"), response = "count", reference = ~ Binomial(5), control = control.ergm(MCMLE.maxit = 100, MCMC.effectiveSize = 50) ) return(model) } move_counts.stable edgeNet.stable <- build_network(move_counts.stable, metadata, activity_data) model.stable <- run_network2(edgeNet.stable) save(model.stable, file = here("data/scratch/ergm-model-stable.rda")) ```