```{r} #| echo: false #| output: false #| warning: false #| label: push-pull-prep library(arrow) library(tidyverse) library(tsibble) library(fable) library(lmtest) library(jsonlite) library(here) source(here("code/helpers.R")) accounts <- load_accounts() jm <- arrow::read_feather(here("data/scratch/joinmastodon.feather")) ``` ```{r} #| label: prep-break-one-raw-counts server_list <- c( "mastodon.social", "mastodon.online" ) early.jm_servers <- as_tibble(fromJSON(here("data/joinmastodon-2020-09-18.json")))$domain early.day_counts <- accounts %>% filter(created_at < "2021-09-01") %>% mutate(created_day = as.Date(floor_date(created_at, unit = "day"))) %>% mutate(server_code = ifelse(server %in% early.jm_servers, "joinmastodon", "other")) %>% mutate(server_code = ifelse(server == "mastodon.social", "mastodon.social", server_code)) %>% mutate(server = ifelse(server == "mastodon.online", "mastodon.online", server_code)) %>% group_by(created_day, server) %>% summarize(count = n(), .groups = "drop") %>% as_tsibble(., key=server, index=created_day) %>% fill_gaps(count=0) %>% mutate(first_open = ((created_day >= "2020-09-18") & (created_day < "2020-11-01"))) %>% #mutate(second_open = ((created_day > "2020-11-02") & (created_day < "2020-11-05"))) %>% mutate(third_open = (created_day >= "2021-04-17")) %>% mutate(open = (first_open | third_open)) early.data_plot <- early.day_counts %>% mutate(created_week = as.Date(floor_date(created_day, unit = "week"))) %>% ggplot(aes(x = created_day, y=count)) + geom_rect(data = (early.day_counts %>% filter(open)), aes(xmin = created_day - 0.5, xmax = created_day + 0.5, ymin = 0, ymax = Inf), fill = "lightblue", alpha = 0.3) + # Adjust color and transparency as needed geom_bar(stat="identity") + facet_wrap(~ server, ncol=1, strip.position = "left") + #, scales="free_y") + scale_x_date(expand = c(0, 0), date_labels = "%B %Y") + scale_y_log10() + labs( title = "Open registration periods on mastodon.social (August 2020 - August 2021)", x = "Account Created Date", y = "Count" ) + theme_bw_small_labels() ``` ```{r} #| label: table-early-open-coefs if (knitr::is_latex_output()) { format <- "latex" } else { format <- "html" } model_data <- early.day_counts %>% mutate(count = log1p(count)) %>% ungroup %>% arrange(created_day) %>% mutate(day = row_number()) fit <- model_data %>% model(arima = ARIMA(count ~ open + day + open:day + fourier(period=7, K=2) + pdq(2,0,0) + PDQ(0,0,0,period=7))) early.table <- fit %>% tidy %>% mutate(p.value = scales::pvalue(p.value)) %>% pivot_wider(names_from=server, values_from = c(estimate, std.error, statistic, p.value)) %>% select(-c(.model)) %>% select(term, estimate_mastodon.online, p.value_mastodon.online, estimate_mastodon.social, p.value_mastodon.social, estimate_joinmastodon, p.value_joinmastodon, estimate_other, p.value_other ) %>% #select(term, starts_with("estimate"), starts_with("p.value")) #%>% knitr::kable( ., format = format, col.names = c("Term", "mastodon.online", "", "mastodon.social", "", "joinmastodon", "", "other", ""), digits = 4, align = c("l", "r", "r", "r", "r", "r", "r", "r", "r"), booktabs = T ) ``` ```{r} #| label: prep-break-two-raw-counts email.jm_servers <- as_tibble(fromJSON(here("data/joinmastodon-2023-08-25.json")))$domain email.day_counts <- accounts %>% filter(created_at > "2022-07-01") %>% filter(created_at < "2022-10-26") %>% mutate(created_day = as.Date(floor_date(created_at, unit = "day"))) %>% mutate(server_code = ifelse(server %in% email.jm_servers, "joinmastodon", "other")) %>% mutate(server = ifelse(server == "mastodon.social", "mastodon.social", server_code)) %>% #mutate(server = server_code) %>% #filter(server != "other") %>% group_by(created_day, server) %>% summarize(count = n(), .groups = "drop") %>% as_tsibble(., key = server, index = created_day) %>% fill_gaps(count = 0) %>% mutate(open = ((created_day < "2022-08-13") | (created_day > "2022-10-03"))) email.data_plot <- email.day_counts %>% #filter(server != "other") %>% mutate(created_week = as.Date(floor_date(created_day, unit = "week"))) %>% ggplot(aes(x = created_day, y = count)) + geom_rect( data = (email.day_counts %>% filter(open)), aes( xmin = created_day - 0.5, xmax = created_day + 0.5, ymin = 0, ymax = Inf ), fill = "lightblue", alpha = 0.3 ) + # Adjust color and transparency as needed geom_bar(stat = "identity") + facet_wrap( ~ server, ncol = 1, strip.position = "left") + #, scales="free_y") + scale_x_date(expand = c(0, 0), date_labels = "%B %Y") + labs( title = "Closure of mastodon.social (2022)", x = "Account Created Date", y = "Count" ) + theme_bw_small_labels() ``` ```{r} #| label: email-open-coefs if (knitr::is_latex_output()) { format <- "latex" } else { format <- "html" } model_data <- email.day_counts %>% mutate(count = log1p(count)) %>% ungroup %>% arrange(created_day) %>% mutate(day = row_number()) fit <- model_data %>% model(arima = ARIMA(count ~ open + day + open:day + fourier(period=7, K=2) + pdq(2,0,0) + PDQ(0,0,0,period=7))) email.table <- fit %>% tidy %>% mutate(p.value = scales::pvalue(p.value)) %>% pivot_wider(names_from=server, values_from = c(estimate, std.error, statistic, p.value)) %>% select(-c(.model)) %>% select(term, estimate_mastodon.social, p.value_mastodon.social, estimate_joinmastodon, p.value_joinmastodon, estimate_other, p.value_other ) %>% knitr::kable( ., format = format, col.names = c("Term", "mastodon.social", "", "joinmastodon", "", "other", ""), digits = 4, align = c("l", "r", "r", "r", "r", "r", "r"), booktabs = T ) ``` ```{r} #| label: prep-break-three-raw-counts late.jm_servers <- as_tibble(fromJSON(here("data/joinmastodon-2023-08-25.json")))$domain last.day_counts <- accounts %>% filter(created_at > "2022-12-01") %>% filter(created_at < "2023-05-01") %>% mutate(created_day = as.Date(floor_date(created_at, unit = "day"))) %>% mutate(server_code = ifelse(server %in% late.jm_servers, "joinmastodon", "other")) %>% mutate(server_code = ifelse(server == "mastodon.social", "mastodon.social", server_code)) %>% mutate(server = server_code) %>% #filter(server != "other") %>% group_by(created_day, server) %>% summarize(count = n(), .groups = "drop") %>% as_tsibble(., key=server, index=created_day) %>% fill_gaps(count=0) %>% mutate(open = (created_day > "2023-02-08") | ((created_day > "2022-12-10") & (created_day < "2022-12-17"))) last.data_plot <- last.day_counts %>% #filter(server != "other") %>% mutate(created_week = as.Date(floor_date(created_day, unit = "week"))) %>% ggplot(aes(x = created_day, y=count)) + geom_rect(data = (last.day_counts %>% filter(open)), aes(xmin = created_day - 0.5, xmax = created_day + 0.5, ymin = 0, ymax = Inf), fill = "lightblue", alpha = 0.3) + # Adjust color and transparency as needed geom_bar(stat="identity") + facet_wrap(~ server, ncol=1, strip.position = "left") + #, scales="free_y") + scale_x_date(expand = c(0, 0), date_labels = "%B %Y") + #scale_y_log10() + labs( x = "Account Created Date", y = "Count" ) + theme_bw_small_labels() #library(patchwork) #early.data_plot + email.data_plot + last.data_plot + plot_layout(ncol = 1) ``` ```{r} #| label: late-open-coefs if (knitr::is_latex_output()) { format <- "latex" } else { format <- "html" } model_data <- last.day_counts %>% mutate(count = log1p(count)) %>% ungroup %>% arrange(created_day) %>% mutate(day = row_number()) fit <- model_data %>% model(arima = ARIMA(count ~ open + day + open:day + fourier(period=7, K=2) + pdq(2,0,0) + PDQ(0,0,0,period=7))) last.table <- fit %>% tidy %>% mutate(p.value = scales::pvalue(p.value)) %>% pivot_wider(names_from=server, values_from = c(estimate, std.error, statistic, p.value)) %>% select(-c(.model)) %>% select(term, estimate_mastodon.social, p.value_mastodon.social, estimate_joinmastodon, p.value_joinmastodon, estimate_other, p.value_other ) %>% knitr::kable( ., format = format, col.names = c("Term", "mastodon.social", "", "joinmastodon", "", "other", ""), digits = 4, align = c("l", "r", "r", "r", "r", "r", "r"), booktabs = T ) ``` ```{r} #| eval: false library(sandwich) model.poisson <- early.day_counts %>% filter(server == "mastodon.online") %>% filter(created_day > "2020-08-01") %>% filter(created_day < "2021-09-01") %>% ungroup %>% arrange(created_day) %>% mutate(day = row_number()) %>% glm(count ~ day*open, data=., family=poisson) v <- sqrt(diag(vcovHC(model.poisson, type = "HC0"))) coeftest(model.poisson, vcovHC(model.poisson, type="HC0")) ``` ::: {.panel-tabset} #### Early ```{r} #| label: fig-break-one-raw-counts #| fig-height: 4 #| fig-width: 6.75 #| fig-env: figure* #| fig-pos: p early.data_plot ``` #### Email ```{r} #| label: fig-break-two-raw-counts #| fig-height: 3.5 #| fig-width: 6.75 #| fig-env: figure* #| fig-pos: p email.data_plot ``` #### Last ```{r} #| label: fig-break-three-raw-counts #| fig-height: 3.5 #| fig-width: 6.75 #| fig-env: figure* #| fig-pos: p last.data_plot ``` ::: ::: {.panel-tabset} #### Early ::: {#tbl-early .column-body} ```{r} early.table ``` Caption ::: #### Email ::: {#tbl-email .column-body} ```{r} email.table ``` ::: #### Last ::: {#tbl-last .column-body} ```{r} last.table ``` ::: ::: ```{r} #| label: fig-mastodon-online-forecast #| fig-cap: "Historical signup counts for mastodon.online and two alternative forecasts based on whether or not mastoodn.social is accepting signups." #| fig-height: 2.7 #| fig-width: 6.75 #| exec: false #| fig-env: figure* model_data <- early.day_counts %>% mutate(count = log1p(count)) %>% ungroup %>% arrange(created_day) %>% mutate(day = row_number()) fit <- model_data %>% model(arima = ARIMA(count ~ open + day + open:day + fourier(period=7, K=2) + pdq(2,0,0) + PDQ(0,0,0,period=7))) f_server <- "mastodon.online" new_data <- tsibble( created_day = max(model_data$created_day) + 1:100, day = max(model_data$day) + 1:100, server = f_server #"" ) model.obj <- fit %>% filter(server == f_server) %>% select(arima) %>% pull %>% first forecast.open <- model.obj %>% forecast(new_data=(new_data %>% add_column(open = TRUE))) %>% hilo %>% unpack_hilo(`95%`) forecast.closed <- model.obj %>% forecast(new_data=(new_data %>% add_column(open = FALSE))) %>% hilo %>% unpack_hilo(`95%`) hist_data <- as_tibble(model_data) %>% filter(server == f_server) %>% select(created_day, server, count, open) %>% rename(count_mean=count) bind_rows( as_tibble(forecast.open), as_tibble(forecast.closed) ) %>% rename(count_mean=.mean) %>% ggplot(aes(x=created_day, y=count_mean)) + geom_line(aes(color=open, group=open)) + #, linetype="dashed") + geom_ribbon(aes(ymin=`95%_lower`, ymax=`95%_upper`, group=open, fill=open), alpha=0.25) + geom_line(aes(x=created_day, y=count_mean), data=hist_data) + # , color=open, group=open geom_rect(data = (hist_data %>% filter(open)), aes(xmin = created_day - 0.5, xmax = created_day + 0.5, ymin = 0, ymax = Inf), fill = "lightblue", alpha = 0.3) + # Adjust color and transparency as needed labs( x = "Date", y = "Accounts created (log1p)", color = "Signups open on mastodon.social", fill = "Signups open on mastodon.social" ) + scale_x_date(expand = c(0, 0), date_labels = "%B %Y") + theme_bw_small_labels() + theme(legend.position="top", axis.title.x=element_blank()) ```