junior-sheer/notebooks/survival.qmd
2024-02-27 14:08:11 -06:00

79 lines
2.4 KiB
Plaintext

---
title: "Model for Survival"
engine: knitr
---
```{r}
#| label: setup
library(here)
library(survival)
source(here("code/helpers.R"))
options(arrow.skip_nul = TRUE)
a <- load_accounts() %>%
filter(!has_moved) %>%
filter(locked == FALSE) %>%
anti_join(., arrow::read_feather(here("data/scratch/individual_moved_accounts.feather")), by=c("username"="moved_acct", "server"="moved_server")) %>%
inner_join(arrow::read_feather(here("data/scratch/metadata.feather")), by="server") %>%
filter(created_at >= "2023-11-01") %>%
filter(created_at <= "2023-11-30") %>%
#inner_join(activity, by="server") %>%
filter(created_at < last_status_at) %>%
mutate(active_time = as.integer(active_time)) %>%
mutate(status = ifelse(active, 0, 1)) %>%
mutate(jm = server %in% arrow::read_feather(here("data/scratch/joinmastodon.feather"))$domain) %>%
mutate(follows_someone = following_count > 0) %>%
mutate(has_a_follower = followers_count > 0)
```
```{r}
server_summary <- a %>%
group_by(server) %>%
summarize(cohort_size = n(), .groups = 'drop')
sel_a <- a %>%
mutate(is_ms = server == "mastodon.social") %>%
ungroup() %>%
inner_join(server_summary, by = "server")
cx <- sel_a %>%
mutate(c_size = as.factor(as.integer(log10(cohort_size)))) %>%
mutate(follows_someone = following_count >= 14) %>%
filter(followers_count > 0) %>%
filter(following_count > 0) %>%
coxph(Surv(active_time_weeks, status) ~ cluster(server) + jm + is_ms, data = ., x=TRUE, robust = T) # cluster(server)
cz <- cox.zph(cx)
#plot(cz)
cx
cz
```
```{r}
#| label: fig-survival-plot
#| fig-cap: Survival plot for accounts created in November 2023. Accounts created on mastodon.social had a higher hazard than accounts created on other servers.
#| fig-width: 6.75
#| fig-height: 3
library(ggsurvfit)
sel_a %>%
mutate(c_size = as.factor(as.integer(log10(cohort_size)))) %>%
mutate(no_followers = !has_a_follower) %>%
filter(followers_count > 0) %>%
filter(following_count > 0) %>%
filter(statuses_count > 0) %>%
mutate(small = cohort_size < 10) %>%
mutate(large = cohort_size > 100) %>%
survfit2(Surv(active_time_weeks, status) ~ jm + strata(is_ms), data = .) %>% # cluster(server)
ggsurvfit() +
add_confidence_interval() +
#scale_y_continuous(limits = c(0, 1)) +
labs(
y = "Overall survival probability",
x = "Time (days)",
) +
theme_bw_small_labels() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
```