# tets ```{r} #| label: fig-moved-accounts #| fig-height: 4 #| eval: false moved_accounts <- arrow::read_feather("data/scratch/moved_accounts.feather") popular_servers <- arrow::read_feather("data/scratch/popular_servers.feather") server_movement_data <- left_join( (moved_accounts %>% group_by(server) %>% summarize(out_count = sum(count)) %>% select(server, out_count)), (moved_accounts %>% group_by(moved_server) %>% summarize(in_count = sum(count)) %>% select(moved_server, in_count) %>% rename(server=moved_server)), by="server" ) %>% replace_na(list(out_count = 0, in_count = 0)) %>% mutate(diff = in_count - out_count) %>% arrange(diff) %>% left_join(., popular_servers, by="server") %>% rename(user_count = count) %>% arrange(desc(user_count)) server_movement_data %>% ggplot(aes(x=user_count, y=diff)) + geom_point() + scale_x_log10() + theme_bw_small_labels() ``` If there was no relationship, we would expect these jumps to be random with respect to server size. ```{r} popular_servers <- arrow::read_feather("data/scratch/popular_servers.feather") moved_accounts <- arrow::read_feather("data/scratch/moved_accounts.feather") %>% # Remove loops filter(server != moved_server) activity <- arrow::read_feather("data/scratch/activity.feather", col_select = c("server", "logins")) %>% arrange(desc(logins)) popular_and_large_servers <- popular_servers %>% filter(count >= 1) %>% mutate(count = log10(count)) jm <- arrow::read_feather("data/scratch/joinmastodon.feather") ma <- moved_accounts %>% filter(server %in% popular_and_large_servers$server) %>% filter(moved_server %in% popular_and_large_servers$server) # Construct network edgeNet <- network(ma, matrix.type = "edgelist") edgeNet %v% "user_count" <- left_join((as_tibble(edgeNet %v% 'vertex.names') %>% rename(server = value)), popular_and_large_servers, by = "server") %>% select(count) %>% unlist() edgeNet %v% "in_jm" <- as_tibble(edgeNet %v% 'vertex.names') %>% mutate(in_jm = value %in% jm$domain) %>% select(in_jm) %>% unlist() ``` We construct an exponential family random graph model (ERGM) where nodes represent servers and weighted directed edges represent the number of accounts that moved between servers. $$ \begin{aligned} \text{Sum}_{i,j} = & \beta_1 (log10(\text{user count}_j) - log10(\text{user count}_i)) + \\ & \beta_2 \\ & \beta_3 \\ & \beta_4 \\ \end{aligned} $$ ```{r} #| label: ergm-model #| cache: true m1 <- ergm( edgeNet ~ sum + diff("user_count", pow = 1, form = "sum") + nodecov("user_count", form = "sum") + nodematch("in_jm", diff = TRUE, form = "sum"), response = "count", reference = ~ Binomial(3), control=control.ergm(parallel=4, parallel.type="PSOCK") ) save(m1, file = "data/scratch/ergm-model.rda") ``` ```{r} #| label: tag-ergm-result #| output: asis ergm_model <- load("data/scratch/ergm-model.rda") modelsummary( m1, escape = FALSE, coef_rename = c( "sum" = "$\\beta_0$ Intercept", "diff.sum.t-h.user_count" = "$\\beta_1$ User Count Difference", "nodecov.sum.user_count" = "$\\beta_2$ User Count (Node Covariate)", "nodematch.sum.in_jm.TRUE" = "$\\beta_3$ In JoinMastodon (Both True)", "nodematch.sum.in_jm.FALSE" = "$\\beta_4$ In JoinMastodon (Both False)" ), ) ``` We find a strong preference for accounts to move from large servers to smaller servers. ```{python} #| eval: false #| include: false import random def simulate_account_moves(origin: str, servers: dict, n: int): server_list = list(set(servers.keys()) - {origin}) weights = [servers[x] for x in server_list] return pl.DataFrame({ "simulation": list(range(n)), "server": [origin] * n, "moved_server": random.choices(server_list, weights=weights, k=n) }) simulations = pl.concat([simulate_account_moves(row["server"], {x["server"]: x["count"] for x in popular_servers.iter_rows(named=True)}, 1000) for row in maccounts.iter_rows(named=True)]) m_counts = maccounts.join(popular_servers, how="inner", on="server").rename({"count": "origin_count"}).join(popular_servers.rename({"server": "moved_server"}), how="inner", on="moved_server").rename({"count": "target_count"}) sim_counts = simulations.join(popular_servers, how="inner", on="server").rename({"count": "origin_count"}).join(popular_servers.rename({"server": "moved_server"}), how="inner", on="moved_server").rename({"count": "target_count"}) ```