library(here) library(survival) library(tidyverse) library(ggsurvfit) library(coxme) library(jsonlite) source(here("code/helpers.R")) options(arrow.skip_nul = TRUE) active_period <- 91 a <- load_accounts() %>% filter(!has_moved) %>% filter(locked == FALSE) %>% anti_join(., arrow::read_feather(here("data/scratch/individual_moved_accounts.feather")), by=c("username"="moved_acct", "server"="moved_server")) %>% inner_join(arrow::read_feather(here("data/scratch/metadata.feather")), by="server") %>% filter(created_at > "2023-04-30") %>% filter(created_at <= "2023-06-30") %>% filter(created_at < last_status_at) %>% mutate(jm = server %in% as_tibble(jsonlite::fromJSON(here("data/joinmastodon-2023-08-25.json")))$domain) %>% mutate(active_time = difftime(last_status_at, created_at, units="days")) %>% mutate(active_time_censored = ifelse(active_time > active_period, active_period, active_time)) %>% #mutate(active_time = difftime(last_status_at, created_at, units="days")) %>% #0=alive, 1=dead. mutate(status = ifelse(active_time > active_period, 0, 1))# %>% filter(followers_count > 0) %>% filter(following_count > 0) a %>% select(status, created_at, last_status_at, active_time_censored) activity <- arrow::read_feather( here("data/scratch/activity.feather"), col_select = c("server", "logins") ) %>% arrange(desc(logins)) server_summary <- a %>% group_by(server) %>% summarize(cohort_size = n(), .groups = "drop") general_servers <- c( "mastodon.social", "mstdn.social", "mastodon.world", "mas.to", "mastodon.online", "mastodonapp.uk", "universeodon.com", "masto.ai", "c.im", "social.vivaldi.net", "mstdn.party", "ohai.social" ) # > 100 cohort size + strength > .75 server_centrality <- arrow::read_ipc_file("data/scratch/server_centrality.feather") %>% rename(generality = strength) sel_a <- a %>% mutate(is_ms = server == "mastodon.social") %>% mutate(general = server %in% general_servers) %>% ungroup() %>% inner_join(server_summary, by = "server") %>% left_join(server_centrality, by = "server") %>% mutate(small_server = user_count <= 100) %>% mutate(large_server = user_count >= 1000) %>% #filter(jm) %>% mutate(group = as.integer(jm) + as.integer(general)) %>% #mutate(huge_server = user_count >= 3162.28) %>% #filter(statuses_count >= 10) %>% filter(!noindex)# %>% filter(cohort_size >= 2) #%>% filter(user_count > 100) cx <- sel_a %>% coxph(Surv(active_time_censored, status) ~ general, data = ., x=TRUE, robust = T, cluster=server) cxme <- sel_a %>% coxme(Surv(active_time_censored, status) ~ factor(group) + small_server + (1|server), data = .) cz <- cox.zph(cx) plot_km <- sel_a %>% mutate(id = paste(username, server, sep = "@")) %>% survfit2( Surv(active_time_censored, status) ~ general, data = ., id = id, cluster = server, robust = TRUE ) %>% ggsurvfit() + add_confidence_interval() + labs( y = "Overall survival probability", x = "Time (days)", ) + scale_fill_discrete(name = "Group", labels = c("Other JoinMastodon", "General")) + scale_color_discrete(name = "Group", labels = c("Other JoinMastodon", "General")) + theme_bw_small_labels() + theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position="bottom") # logistic regression on generality logit <- sel_a %>% glm(active ~ generality, data = ., family = "binomial")