--- title: "Model for Survival" engine: knitr --- ```{r} #| label: setup library(here) library(survival) source(here("code/helpers.R")) options(arrow.skip_nul = TRUE) a <- load_accounts() %>% filter(!has_moved) %>% filter(locked == FALSE) %>% anti_join(., arrow::read_feather(here("data/scratch/individual_moved_accounts.feather")), by=c("username"="moved_acct", "server"="moved_server")) %>% inner_join(arrow::read_feather(here("data/scratch/metadata.feather")), by="server") %>% filter(created_at >= "2023-11-01") %>% filter(created_at <= "2023-11-30") %>% #inner_join(activity, by="server") %>% filter(created_at < last_status_at) %>% mutate(active_time = as.integer(active_time)) %>% mutate(status = ifelse(active, 0, 1)) %>% mutate(jm = server %in% arrow::read_feather(here("data/scratch/joinmastodon.feather"))$domain) %>% mutate(follows_someone = following_count > 0) %>% mutate(has_a_follower = followers_count > 0) ``` ```{r} server_summary <- a %>% group_by(server) %>% summarize(cohort_size = n(), .groups = 'drop') sel_a <- a %>% mutate(is_ms = server == "mastodon.social") %>% ungroup() %>% inner_join(server_summary, by = "server") cx <- sel_a %>% mutate(c_size = as.factor(as.integer(log10(cohort_size)))) %>% mutate(follows_someone = following_count >= 14) %>% filter(followers_count > 0) %>% filter(following_count > 0) %>% coxph(Surv(active_time_weeks, status) ~ cluster(server) + jm + is_ms, data = ., x=TRUE, robust = T) # cluster(server) cz <- cox.zph(cx) #plot(cz) cx cz ``` ```{r} #| label: fig-survival-plot #| fig-cap: Survival plot for accounts created in November 2023. Accounts created on mastodon.social had a higher hazard than accounts created on other servers. #| fig-width: 6.75 #| fig-height: 3 library(ggsurvfit) sel_a %>% mutate(c_size = as.factor(as.integer(log10(cohort_size)))) %>% mutate(no_followers = !has_a_follower) %>% filter(followers_count > 0) %>% filter(following_count > 0) %>% filter(statuses_count > 0) %>% mutate(small = cohort_size < 10) %>% mutate(large = cohort_size > 100) %>% survfit2(Surv(active_time_weeks, status) ~ jm + strata(is_ms), data = .) %>% # cluster(server) ggsurvfit() + add_confidence_interval() + #scale_y_continuous(limits = c(0, 1)) + labs( y = "Overall survival probability", x = "Time (days)", ) + theme_bw_small_labels() + theme(axis.text.x = element_text(angle = 45, hjust = 1)) ```