library(tidyverse) library(arrow) library(here) library(patchwork) library(scales) theme_bw_small_labels <- function(base_size = 9) { theme_bw(base_size = base_size) %+replace% theme( plot.title = element_text(size = base_size * 0.8), plot.subtitle = element_text(size = base_size * 0.75), plot.caption = element_text(size = base_size * 0.7), axis.title = element_text(size = base_size * 0.9), axis.text = element_text(size = base_size * 0.8), legend.title = element_text(size = base_size * 0.9), legend.text = element_text(size = base_size * 0.8) ) } load_accounts <- function(filt = TRUE) { accounts_unfilt <- arrow::read_feather( here("data/scratch/all_accounts.feather"), col_select=c( "server", "username", "created_at", "last_status_at", "statuses_count", "has_moved", "bot", "suspended", "following_count", "followers_count", "locked", "noindex", "group", "discoverable", "limited" )) if (!filt) { return(accounts_unfilt) } return( accounts_unfilt %>% filter(!bot) %>% # TODO: what's going on here? filter(!is.na(last_status_at)) %>% #mutate(limited = replace_na(limited, FALSE)) %>% mutate(suspended = replace_na(suspended, FALSE)) %>% filter(!limited) %>% # sanity check filter(!suspended) %>% filter(!has_moved) %>% #filter(!limited) %>% filter(created_at >= "2020-08-14") %>% filter(created_at < "2024-01-01") %>% # We don't want accounts that were created and then immediately stopped being active filter(statuses_count >= 1) %>% filter(last_status_at > created_at) %>% mutate(active = last_status_at >= "2024-01-01") %>% mutate(last_status_at_censored = ifelse(active, lubridate::ymd_hms("2024-01-01 00:00:00", tz = "UTC"), last_status_at)) %>% mutate(active_time = difftime(last_status_at, created_at, units="days")) ) } account_timeline_plot <- function() { jm <- arrow::read_feather(here("data/scratch/joinmastodon.feather")) moved_to <- arrow::read_feather(here("data/scratch/individual_moved_accounts.feather")) accounts_unfilt <- arrow::read_feather( here("data/scratch/all_accounts.feather"), col_select=c( "server", "username", "created_at", "last_status_at", "statuses_count", "has_moved", "bot", "suspended", "following_count", "followers_count", "locked", "noindex", "group", "discoverable" )) accounts <- accounts_unfilt %>% filter(!bot) %>% # TODO: what's going on here? filter(!is.na(last_status_at)) %>% mutate(suspended = replace_na(suspended, FALSE)) %>% # sanity check filter(created_at >= "2020-10-01") %>% #filter(created_at < "2024-01-01") %>% filter(created_at < "2023-08-15") %>% # We don't want accounts that were created and then immediately stopped being active filter(statuses_count >= 1) %>% filter(last_status_at >= created_at) %>% mutate(active = last_status_at >= "2024-01-01") %>% mutate(last_status_at = ifelse(active, lubridate::ymd_hms("2024-01-01 00:00:00", tz = "UTC"), last_status_at)) %>% mutate(active_time = difftime(last_status_at, created_at, units="days")) #%>% #filter(!has_moved) acc_data <- accounts %>% #filter(!has_moved) %>% mutate(created_month = format(created_at, "%Y-%m")) %>% mutate(created_week = floor_date(created_at, unit = "week")) %>% mutate(active_now = active) %>% mutate(active = active_time >= 91) %>% mutate("Is mastodon.social" = server == "mastodon.social") %>% mutate(jm = server %in% jm$domain) %>% group_by(created_week) %>% summarize( `JoinMastodon Server` = sum(jm) / n(), `Is mastodon.social` = sum(`Is mastodon.social`)/n(), Suspended = sum(suspended)/n(), Active = (sum(active)-sum(has_moved)-sum(suspended))/(n()-sum(has_moved)-sum(suspended)), active_now = (sum(active_now)-sum(has_moved)-sum(suspended))/(n()-sum(has_moved)-sum(suspended)), Moved=sum(has_moved)/n(), count=n()) %>% pivot_longer(cols=c("JoinMastodon Server", "Active", "Moved", "Is mastodon.social"), names_to="Measure", values_to="value") # "Suspended" p1 <- acc_data %>% ggplot(aes(x=as.Date(created_week), group=1)) + geom_line(aes(y=value, group=Measure, color=Measure)) + geom_point(aes(y=value, color=Measure), size=0.7) + scale_y_continuous(limits = c(0, 1.0)) + labs(y="Proportion") + scale_x_date(labels=date_format("%Y-%U"), breaks = "8 week") + theme_bw_small_labels() + theme(axis.title.x = element_blank(), axis.text.x = element_blank(), axis.ticks.x = element_blank()) p2 <- acc_data %>% distinct(created_week, count) %>% ggplot(aes(x=as.Date(created_week), y=count)) + geom_bar(stat="identity", fill="black") + geom_vline( aes(xintercept = as.numeric(as.Date("2022-10-27"))), linetype="dashed", color = "black") + geom_vline( aes(xintercept = as.numeric(as.Date("2022-04-14"))), linetype="dashed", color = "black") + # https://twitter.com/elonmusk/status/1675187969420828672 geom_vline( aes(xintercept = as.numeric(as.Date("2022-12-15"))), linetype="dashed", color = "black") + geom_vline( aes(xintercept = as.numeric(as.Date("2023-07-01"))), linetype="dashed", color = "black") + #scale_y_continuous(limits = c(0, max(acc_data$count) + 100000)) + scale_y_continuous(labels = scales::comma) + labs(y="Count", x="Created Week") + theme_bw_small_labels() + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + scale_x_date(labels=date_format("%Y-%U"), breaks = "8 week") return(p1 + p2 + plot_layout(ncol = 1, guides = "collect")) }