131 lines
5.5 KiB
R
131 lines
5.5 KiB
R
library(tidyverse)
|
|
library(arrow)
|
|
library(here)
|
|
library(patchwork)
|
|
library(scales)
|
|
|
|
theme_bw_small_labels <- function(base_size = 9) {
|
|
theme_bw(base_size = base_size) %+replace%
|
|
theme(
|
|
plot.title = element_text(size = base_size * 0.8),
|
|
plot.subtitle = element_text(size = base_size * 0.75),
|
|
plot.caption = element_text(size = base_size * 0.7),
|
|
axis.title = element_text(size = base_size * 0.9),
|
|
axis.text = element_text(size = base_size * 0.8),
|
|
legend.title = element_text(size = base_size * 0.9),
|
|
legend.text = element_text(size = base_size * 0.8)
|
|
)
|
|
}
|
|
|
|
load_accounts <- function(filt = TRUE) {
|
|
accounts_unfilt <- arrow::read_feather(
|
|
here("data/scratch/all_accounts.feather"),
|
|
col_select=c(
|
|
"server", "username", "created_at", "last_status_at",
|
|
"statuses_count", "has_moved", "bot", "suspended",
|
|
"following_count", "followers_count", "locked",
|
|
"noindex", "group", "discoverable", "limited"
|
|
))
|
|
if (!filt) {
|
|
return(accounts_unfilt)
|
|
}
|
|
return(
|
|
accounts_unfilt %>%
|
|
filter(!bot) %>%
|
|
# TODO: what's going on here?
|
|
filter(!is.na(last_status_at)) %>%
|
|
#mutate(limited = replace_na(limited, FALSE)) %>%
|
|
mutate(suspended = replace_na(suspended, FALSE)) %>%
|
|
filter(!limited) %>%
|
|
# sanity check
|
|
filter(!suspended) %>%
|
|
filter(!has_moved) %>%
|
|
#filter(!limited) %>%
|
|
filter(created_at >= "2020-08-14") %>%
|
|
filter(created_at < "2024-01-01") %>%
|
|
# We don't want accounts that were created and then immediately stopped being active
|
|
filter(statuses_count >= 1) %>%
|
|
filter(last_status_at > created_at) %>%
|
|
mutate(active = last_status_at >= "2024-01-01") %>%
|
|
mutate(last_status_at_censored = ifelse(active, lubridate::ymd_hms("2024-01-01 00:00:00", tz = "UTC"), last_status_at)) %>%
|
|
mutate(active_time = difftime(last_status_at, created_at, units="days"))
|
|
)
|
|
}
|
|
|
|
account_timeline_plot <- function() {
|
|
jm <- arrow::read_feather(here("data/scratch/joinmastodon.feather"))
|
|
moved_to <- arrow::read_feather(here("data/scratch/individual_moved_accounts.feather"))
|
|
accounts_unfilt <- arrow::read_feather(
|
|
here("data/scratch/all_accounts.feather"),
|
|
col_select=c(
|
|
"server", "username", "created_at", "last_status_at",
|
|
"statuses_count", "has_moved", "bot", "suspended",
|
|
"following_count", "followers_count", "locked",
|
|
"noindex", "group", "discoverable"
|
|
))
|
|
accounts <- accounts_unfilt %>%
|
|
filter(!bot) %>%
|
|
# TODO: what's going on here?
|
|
filter(!is.na(last_status_at)) %>%
|
|
mutate(suspended = replace_na(suspended, FALSE)) %>%
|
|
# sanity check
|
|
filter(created_at >= "2020-10-01") %>%
|
|
#filter(created_at < "2024-01-01") %>%
|
|
filter(created_at < "2023-08-15") %>%
|
|
# We don't want accounts that were created and then immediately stopped being active
|
|
filter(statuses_count >= 1) %>%
|
|
filter(last_status_at >= created_at) %>%
|
|
mutate(active = last_status_at >= "2024-01-01") %>%
|
|
mutate(last_status_at = ifelse(active, lubridate::ymd_hms("2024-01-01 00:00:00", tz = "UTC"), last_status_at)) %>%
|
|
mutate(active_time = difftime(last_status_at, created_at, units="days")) #%>%
|
|
#filter(!has_moved)
|
|
acc_data <- accounts %>%
|
|
#filter(!has_moved) %>%
|
|
mutate(created_month = format(created_at, "%Y-%m")) %>%
|
|
mutate(created_week = floor_date(created_at, unit = "week")) %>%
|
|
mutate(active_now = active) %>%
|
|
mutate(active = active_time >= 91) %>%
|
|
mutate("Is mastodon.social" = server == "mastodon.social") %>%
|
|
mutate(jm = server %in% jm$domain) %>%
|
|
group_by(created_week) %>%
|
|
summarize(
|
|
`JoinMastodon Server` = sum(jm) / n(),
|
|
`Is mastodon.social` = sum(`Is mastodon.social`)/n(),
|
|
Suspended = sum(suspended)/n(),
|
|
Active = (sum(active)-sum(has_moved)-sum(suspended))/(n()-sum(has_moved)-sum(suspended)),
|
|
active_now = (sum(active_now)-sum(has_moved)-sum(suspended))/(n()-sum(has_moved)-sum(suspended)),
|
|
Moved=sum(has_moved)/n(),
|
|
count=n()) %>%
|
|
pivot_longer(cols=c("JoinMastodon Server", "Active", "Moved", "Is mastodon.social"), names_to="Measure", values_to="value") # "Suspended"
|
|
|
|
p1 <- acc_data %>%
|
|
ggplot(aes(x=as.Date(created_week), group=1)) +
|
|
geom_line(aes(y=value, group=Measure, color=Measure)) +
|
|
geom_point(aes(y=value, color=Measure), size=0.7) +
|
|
scale_y_continuous(limits = c(0, 1.0)) +
|
|
labs(y="Proportion") + scale_x_date(labels=date_format("%Y-%U"), breaks = "8 week") +
|
|
theme_bw_small_labels() +
|
|
theme(axis.title.x = element_blank(), axis.text.x = element_blank(), axis.ticks.x = element_blank())
|
|
p2 <- acc_data %>%
|
|
distinct(created_week, count) %>%
|
|
ggplot(aes(x=as.Date(created_week), y=count)) +
|
|
geom_bar(stat="identity", fill="black") +
|
|
geom_vline(
|
|
aes(xintercept = as.numeric(as.Date("2022-10-27"))),
|
|
linetype="dashed", color = "black") +
|
|
geom_vline(
|
|
aes(xintercept = as.numeric(as.Date("2022-04-14"))),
|
|
linetype="dashed", color = "black") +
|
|
# https://twitter.com/elonmusk/status/1675187969420828672
|
|
geom_vline(
|
|
aes(xintercept = as.numeric(as.Date("2022-12-15"))),
|
|
linetype="dashed", color = "black") +
|
|
geom_vline(
|
|
aes(xintercept = as.numeric(as.Date("2023-07-01"))),
|
|
linetype="dashed", color = "black") +
|
|
#scale_y_continuous(limits = c(0, max(acc_data$count) + 100000)) +
|
|
scale_y_continuous(labels = scales::comma) +
|
|
labs(y="Count", x="Created Week") +
|
|
theme_bw_small_labels() + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + scale_x_date(labels=date_format("%Y-%U"), breaks = "8 week")
|
|
return(p1 + p2 + plot_layout(ncol = 1, guides = "collect"))
|
|
} |