79 lines
2.4 KiB
Plaintext
79 lines
2.4 KiB
Plaintext
---
|
|
title: "Model for Survival"
|
|
engine: knitr
|
|
---
|
|
|
|
```{r}
|
|
#| label: setup
|
|
library(here)
|
|
library(survival)
|
|
|
|
source(here("code/helpers.R"))
|
|
options(arrow.skip_nul = TRUE)
|
|
|
|
a <- load_accounts() %>%
|
|
filter(!has_moved) %>%
|
|
filter(locked == FALSE) %>%
|
|
anti_join(., arrow::read_feather(here("data/scratch/individual_moved_accounts.feather")), by=c("username"="moved_acct", "server"="moved_server")) %>%
|
|
inner_join(arrow::read_feather(here("data/scratch/metadata.feather")), by="server") %>%
|
|
filter(created_at >= "2023-11-01") %>%
|
|
filter(created_at <= "2023-11-30") %>%
|
|
#inner_join(activity, by="server") %>%
|
|
filter(created_at < last_status_at) %>%
|
|
mutate(active_time = as.integer(active_time)) %>%
|
|
mutate(status = ifelse(active, 0, 1)) %>%
|
|
mutate(jm = server %in% arrow::read_feather(here("data/scratch/joinmastodon.feather"))$domain) %>%
|
|
mutate(follows_someone = following_count > 0) %>%
|
|
mutate(has_a_follower = followers_count > 0)
|
|
```
|
|
|
|
```{r}
|
|
|
|
server_summary <- a %>%
|
|
group_by(server) %>%
|
|
summarize(cohort_size = n(), .groups = 'drop')
|
|
|
|
sel_a <- a %>%
|
|
mutate(is_ms = server == "mastodon.social") %>%
|
|
ungroup() %>%
|
|
inner_join(server_summary, by = "server")
|
|
|
|
cx <- sel_a %>%
|
|
mutate(c_size = as.factor(as.integer(log10(cohort_size)))) %>%
|
|
mutate(follows_someone = following_count >= 14) %>%
|
|
filter(followers_count > 0) %>%
|
|
filter(following_count > 0) %>%
|
|
coxph(Surv(active_time_weeks, status) ~ cluster(server) + jm + is_ms, data = ., x=TRUE, robust = T) # cluster(server)
|
|
|
|
cz <- cox.zph(cx)
|
|
#plot(cz)
|
|
cx
|
|
cz
|
|
```
|
|
|
|
|
|
```{r}
|
|
#| label: fig-survival-plot
|
|
#| fig-cap: Survival plot for accounts created in November 2023. Accounts created on mastodon.social had a higher hazard than accounts created on other servers.
|
|
#| fig-width: 6.75
|
|
#| fig-height: 3
|
|
library(ggsurvfit)
|
|
sel_a %>%
|
|
mutate(c_size = as.factor(as.integer(log10(cohort_size)))) %>%
|
|
mutate(no_followers = !has_a_follower) %>%
|
|
filter(followers_count > 0) %>%
|
|
filter(following_count > 0) %>%
|
|
filter(statuses_count > 0) %>%
|
|
mutate(small = cohort_size < 10) %>%
|
|
mutate(large = cohort_size > 100) %>%
|
|
survfit2(Surv(active_time_weeks, status) ~ jm + strata(is_ms), data = .) %>% # cluster(server)
|
|
ggsurvfit() +
|
|
add_confidence_interval() +
|
|
#scale_y_continuous(limits = c(0, 1)) +
|
|
labs(
|
|
y = "Overall survival probability",
|
|
x = "Time (days)",
|
|
) +
|
|
theme_bw_small_labels() +
|
|
theme(axis.text.x = element_text(angle = 45, hjust = 1))
|
|
``` |