98 lines
3.3 KiB
R
98 lines
3.3 KiB
R
library(here)
|
|
library(survival)
|
|
library(tidyverse)
|
|
library(ggsurvfit)
|
|
library(coxme)
|
|
library(jsonlite)
|
|
|
|
source(here("code/helpers.R"))
|
|
options(arrow.skip_nul = TRUE)
|
|
|
|
active_period <- 91
|
|
|
|
a <- load_accounts() %>%
|
|
filter(!has_moved) %>%
|
|
filter(locked == FALSE) %>%
|
|
anti_join(., arrow::read_feather(here("data/scratch/individual_moved_accounts.feather")), by=c("username"="moved_acct", "server"="moved_server")) %>%
|
|
inner_join(arrow::read_feather(here("data/scratch/metadata.feather")), by="server") %>%
|
|
filter(created_at > "2023-04-30") %>%
|
|
filter(created_at <= "2023-06-30") %>%
|
|
filter(created_at < last_status_at) %>%
|
|
mutate(jm = server %in% as_tibble(jsonlite::fromJSON(here("data/joinmastodon-2023-08-25.json")))$domain) %>%
|
|
mutate(active_time = difftime(last_status_at, created_at, units="days")) %>%
|
|
mutate(active_time_censored = ifelse(active_time > active_period, active_period, active_time)) %>%
|
|
#mutate(active_time = difftime(last_status_at, created_at, units="days")) %>%
|
|
#0=alive, 1=dead.
|
|
mutate(status = ifelse(active_time > active_period, 0, 1))# %>% filter(followers_count > 0) %>% filter(following_count > 0)
|
|
|
|
a %>% select(status, created_at, last_status_at, active_time_censored)
|
|
|
|
activity <- arrow::read_feather(
|
|
here("data/scratch/activity.feather"),
|
|
col_select = c("server", "logins")
|
|
) %>%
|
|
arrange(desc(logins))
|
|
|
|
server_summary <- a %>%
|
|
group_by(server) %>%
|
|
summarize(cohort_size = n(), .groups = "drop")
|
|
|
|
general_servers <- c(
|
|
"mastodon.social",
|
|
"mstdn.social",
|
|
"mastodon.world",
|
|
"mas.to",
|
|
"mastodon.online",
|
|
"mastodonapp.uk",
|
|
"universeodon.com",
|
|
"masto.ai",
|
|
"c.im",
|
|
"social.vivaldi.net",
|
|
"mstdn.party",
|
|
"ohai.social"
|
|
) # > 100 cohort size + strength > .75
|
|
|
|
server_centrality <- arrow::read_ipc_file("data/scratch/server_centrality.feather") %>%
|
|
rename(generality = strength)
|
|
|
|
sel_a <- a %>%
|
|
mutate(is_ms = server == "mastodon.social") %>%
|
|
mutate(general = server %in% general_servers) %>%
|
|
ungroup() %>%
|
|
inner_join(server_summary, by = "server") %>%
|
|
left_join(server_centrality, by = "server") %>%
|
|
mutate(small_server = user_count <= 100) %>%
|
|
mutate(large_server = user_count >= 1000) %>%
|
|
#filter(jm) %>%
|
|
mutate(group = as.integer(jm) + as.integer(general)) %>%
|
|
#mutate(huge_server = user_count >= 3162.28) %>%
|
|
#filter(statuses_count >= 10) %>%
|
|
filter(!noindex)# %>% filter(cohort_size >= 2) #%>% filter(user_count > 100)
|
|
|
|
cx <- sel_a %>% coxph(Surv(active_time_censored, status) ~ general, data = ., x=TRUE, robust = T, cluster=server)
|
|
|
|
cxme <- sel_a %>% coxme(Surv(active_time_censored, status) ~ factor(group) + small_server + (1|server), data = .)
|
|
cz <- cox.zph(cx)
|
|
|
|
plot_km <- sel_a %>%
|
|
mutate(id = paste(username, server, sep = "@")) %>%
|
|
survfit2(
|
|
Surv(active_time_censored, status) ~ general,
|
|
data = ., id = id,
|
|
cluster = server,
|
|
robust = TRUE
|
|
) %>%
|
|
ggsurvfit() +
|
|
add_confidence_interval() +
|
|
labs(
|
|
y = "Overall survival probability",
|
|
x = "Time (days)",
|
|
) +
|
|
scale_fill_discrete(name = "Group", labels = c("Other JoinMastodon", "General")) +
|
|
scale_color_discrete(name = "Group", labels = c("Other JoinMastodon", "General")) +
|
|
theme_bw_small_labels() +
|
|
theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position="bottom")
|
|
|
|
# logistic regression on generality
|
|
logit <- sel_a %>%
|
|
glm(active ~ generality, data = ., family = "binomial") |