410 lines
12 KiB
Plaintext
410 lines
12 KiB
Plaintext
```{r}
|
|
#| echo: false
|
|
#| output: false
|
|
#| warning: false
|
|
#| label: push-pull-prep
|
|
library(arrow)
|
|
library(tidyverse)
|
|
library(tsibble)
|
|
library(fable)
|
|
library(lmtest)
|
|
library(jsonlite)
|
|
library(here)
|
|
|
|
source(here("code/helpers.R"))
|
|
accounts <- load_accounts()
|
|
jm <- arrow::read_feather(here("data/scratch/joinmastodon.feather"))
|
|
```
|
|
|
|
```{r}
|
|
#| label: prep-break-one-raw-counts
|
|
server_list <- c(
|
|
"mastodon.social", "mastodon.online"
|
|
)
|
|
|
|
early.jm_servers <- as_tibble(fromJSON(here("data/joinmastodon-2020-09-18.json")))$domain
|
|
|
|
early.day_counts <- accounts %>%
|
|
filter(created_at < "2021-09-01") %>%
|
|
mutate(created_day = as.Date(floor_date(created_at, unit = "day"))) %>%
|
|
mutate(server_code = ifelse(server %in% early.jm_servers, "joinmastodon", "other")) %>%
|
|
mutate(server_code = ifelse(server == "mastodon.social", "mastodon.social", server_code)) %>%
|
|
mutate(server = ifelse(server == "mastodon.online", "mastodon.online", server_code)) %>%
|
|
group_by(created_day, server) %>%
|
|
summarize(count = n(), .groups = "drop") %>%
|
|
as_tsibble(., key=server, index=created_day) %>%
|
|
fill_gaps(count=0) %>%
|
|
mutate(first_open = ((created_day >= "2020-09-18") & (created_day < "2020-11-01"))) %>%
|
|
#mutate(second_open = ((created_day > "2020-11-02") & (created_day < "2020-11-05"))) %>%
|
|
mutate(third_open = (created_day >= "2021-04-17")) %>%
|
|
mutate(open = (first_open | third_open))
|
|
|
|
early.data_plot <- early.day_counts %>%
|
|
mutate(created_week = as.Date(floor_date(created_day, unit = "week"))) %>%
|
|
ggplot(aes(x = created_day, y=count)) +
|
|
geom_rect(data = (early.day_counts %>% filter(open)),
|
|
aes(xmin = created_day - 0.5, xmax = created_day + 0.5, ymin = 0, ymax = Inf),
|
|
fill = "lightblue", alpha = 0.3) + # Adjust color and transparency as needed
|
|
geom_bar(stat="identity") +
|
|
facet_wrap(~ server, ncol=1, strip.position = "left") + #, scales="free_y") +
|
|
scale_x_date(expand = c(0, 0), date_labels = "%B %Y") +
|
|
scale_y_log10() +
|
|
labs(
|
|
title = "Open registration periods on mastodon.social (August 2020 - August 2021)",
|
|
x = "Account Created Date",
|
|
y = "Count"
|
|
) +
|
|
theme_bw_small_labels()
|
|
```
|
|
|
|
```{r}
|
|
#| label: table-early-open-coefs
|
|
|
|
if (knitr::is_latex_output()) {
|
|
format <- "latex"
|
|
} else {
|
|
format <- "html"
|
|
}
|
|
|
|
model_data <- early.day_counts %>%
|
|
mutate(count = log1p(count)) %>%
|
|
ungroup %>%
|
|
arrange(created_day) %>%
|
|
mutate(day = row_number())
|
|
|
|
fit <- model_data %>%
|
|
model(arima = ARIMA(count ~ open + day + open:day + fourier(period=7, K=2) + pdq(2,0,0) + PDQ(0,0,0,period=7)))
|
|
|
|
early.table <- fit %>% tidy %>%
|
|
mutate(p.value = scales::pvalue(p.value)) %>%
|
|
pivot_wider(names_from=server, values_from = c(estimate, std.error, statistic, p.value)) %>%
|
|
select(-c(.model)) %>%
|
|
select(term,
|
|
estimate_mastodon.online, p.value_mastodon.online,
|
|
estimate_mastodon.social, p.value_mastodon.social,
|
|
estimate_joinmastodon, p.value_joinmastodon,
|
|
estimate_other, p.value_other
|
|
) %>%
|
|
#select(term, starts_with("estimate"), starts_with("p.value")) #%>%
|
|
knitr::kable(
|
|
.,
|
|
format = format,
|
|
col.names = c("Term", "mastodon.online", "", "mastodon.social", "", "joinmastodon", "", "other", ""),
|
|
digits = 4,
|
|
align = c("l", "r", "r", "r", "r", "r", "r", "r", "r"),
|
|
booktabs = T
|
|
)
|
|
```
|
|
|
|
```{r}
|
|
#| label: prep-break-two-raw-counts
|
|
email.jm_servers <- as_tibble(fromJSON(here("data/joinmastodon-2023-08-25.json")))$domain
|
|
|
|
email.day_counts <- accounts %>%
|
|
filter(created_at > "2022-07-01") %>%
|
|
filter(created_at < "2022-10-26") %>%
|
|
mutate(created_day = as.Date(floor_date(created_at, unit = "day"))) %>%
|
|
mutate(server_code = ifelse(server %in% email.jm_servers, "joinmastodon", "other")) %>%
|
|
mutate(server = ifelse(server == "mastodon.social", "mastodon.social", server_code)) %>%
|
|
#mutate(server = server_code) %>%
|
|
#filter(server != "other") %>%
|
|
group_by(created_day, server) %>%
|
|
summarize(count = n(), .groups = "drop") %>%
|
|
as_tsibble(., key = server, index = created_day) %>%
|
|
fill_gaps(count = 0) %>%
|
|
mutate(open = ((created_day < "2022-08-13") |
|
|
(created_day > "2022-10-03")))
|
|
|
|
email.data_plot <- email.day_counts %>%
|
|
#filter(server != "other") %>%
|
|
mutate(created_week = as.Date(floor_date(created_day, unit = "week"))) %>%
|
|
ggplot(aes(x = created_day, y = count)) +
|
|
geom_rect(
|
|
data = (email.day_counts %>% filter(open)),
|
|
aes(
|
|
xmin = created_day - 0.5,
|
|
xmax = created_day + 0.5,
|
|
ymin = 0,
|
|
ymax = Inf
|
|
),
|
|
fill = "lightblue",
|
|
alpha = 0.3
|
|
) + # Adjust color and transparency as needed
|
|
geom_bar(stat = "identity") +
|
|
facet_wrap( ~ server, ncol = 1, strip.position = "left") + #, scales="free_y") +
|
|
scale_x_date(expand = c(0, 0), date_labels = "%B %Y") +
|
|
labs(
|
|
title = "Closure of mastodon.social (2022)",
|
|
x = "Account Created Date",
|
|
y = "Count"
|
|
) +
|
|
theme_bw_small_labels()
|
|
```
|
|
|
|
```{r}
|
|
#| label: email-open-coefs
|
|
|
|
if (knitr::is_latex_output()) {
|
|
format <- "latex"
|
|
} else {
|
|
format <- "html"
|
|
}
|
|
|
|
model_data <- email.day_counts %>%
|
|
mutate(count = log1p(count)) %>%
|
|
ungroup %>%
|
|
arrange(created_day) %>%
|
|
mutate(day = row_number())
|
|
|
|
fit <- model_data %>%
|
|
model(arima = ARIMA(count ~ open + day + open:day + fourier(period=7, K=2) + pdq(2,0,0) + PDQ(0,0,0,period=7)))
|
|
|
|
email.table <- fit %>% tidy %>%
|
|
mutate(p.value = scales::pvalue(p.value)) %>%
|
|
pivot_wider(names_from=server, values_from = c(estimate, std.error, statistic, p.value)) %>%
|
|
select(-c(.model)) %>%
|
|
select(term,
|
|
estimate_mastodon.social, p.value_mastodon.social,
|
|
estimate_joinmastodon, p.value_joinmastodon,
|
|
estimate_other, p.value_other
|
|
) %>%
|
|
knitr::kable(
|
|
.,
|
|
format = format,
|
|
col.names = c("Term", "mastodon.social", "", "joinmastodon", "", "other", ""),
|
|
digits = 4,
|
|
align = c("l", "r", "r", "r", "r", "r", "r"),
|
|
booktabs = T
|
|
)
|
|
```
|
|
|
|
|
|
```{r}
|
|
#| label: prep-break-three-raw-counts
|
|
late.jm_servers <- as_tibble(fromJSON(here("data/joinmastodon-2023-08-25.json")))$domain
|
|
|
|
last.day_counts <- accounts %>%
|
|
filter(created_at > "2022-12-01") %>%
|
|
filter(created_at < "2023-05-01") %>%
|
|
mutate(created_day = as.Date(floor_date(created_at, unit = "day"))) %>%
|
|
mutate(server_code = ifelse(server %in% late.jm_servers, "joinmastodon", "other")) %>%
|
|
mutate(server_code = ifelse(server == "mastodon.social", "mastodon.social", server_code)) %>%
|
|
mutate(server = server_code) %>%
|
|
#filter(server != "other") %>%
|
|
group_by(created_day, server) %>%
|
|
summarize(count = n(), .groups = "drop") %>%
|
|
as_tsibble(., key=server, index=created_day) %>%
|
|
fill_gaps(count=0) %>%
|
|
mutate(open = (created_day > "2023-02-08") | ((created_day > "2022-12-10") & (created_day < "2022-12-17")))
|
|
|
|
last.data_plot <- last.day_counts %>%
|
|
#filter(server != "other") %>%
|
|
mutate(created_week = as.Date(floor_date(created_day, unit = "week"))) %>%
|
|
ggplot(aes(x = created_day, y=count)) +
|
|
geom_rect(data = (last.day_counts %>% filter(open)),
|
|
aes(xmin = created_day - 0.5, xmax = created_day + 0.5, ymin = 0, ymax = Inf),
|
|
fill = "lightblue", alpha = 0.3) + # Adjust color and transparency as needed
|
|
geom_bar(stat="identity") +
|
|
facet_wrap(~ server, ncol=1, strip.position = "left") + #, scales="free_y") +
|
|
scale_x_date(expand = c(0, 0), date_labels = "%B %Y") +
|
|
#scale_y_log10() +
|
|
labs(
|
|
x = "Account Created Date",
|
|
y = "Count"
|
|
) +
|
|
theme_bw_small_labels()
|
|
|
|
#library(patchwork)
|
|
#early.data_plot + email.data_plot + last.data_plot + plot_layout(ncol = 1)
|
|
```
|
|
|
|
```{r}
|
|
#| label: late-open-coefs
|
|
|
|
if (knitr::is_latex_output()) {
|
|
format <- "latex"
|
|
} else {
|
|
format <- "html"
|
|
}
|
|
|
|
model_data <- last.day_counts %>%
|
|
mutate(count = log1p(count)) %>%
|
|
ungroup %>%
|
|
arrange(created_day) %>%
|
|
mutate(day = row_number())
|
|
|
|
fit <- model_data %>%
|
|
model(arima = ARIMA(count ~ open + day + open:day + fourier(period=7, K=2) + pdq(2,0,0) + PDQ(0,0,0,period=7)))
|
|
|
|
last.table <- fit %>% tidy %>%
|
|
mutate(p.value = scales::pvalue(p.value)) %>%
|
|
pivot_wider(names_from=server, values_from = c(estimate, std.error, statistic, p.value)) %>%
|
|
select(-c(.model)) %>%
|
|
select(term,
|
|
estimate_mastodon.social, p.value_mastodon.social,
|
|
estimate_joinmastodon, p.value_joinmastodon,
|
|
estimate_other, p.value_other
|
|
) %>%
|
|
knitr::kable(
|
|
.,
|
|
format = format,
|
|
col.names = c("Term", "mastodon.social", "", "joinmastodon", "", "other", ""),
|
|
digits = 4,
|
|
align = c("l", "r", "r", "r", "r", "r", "r"),
|
|
booktabs = T
|
|
)
|
|
```
|
|
|
|
|
|
```{r}
|
|
#| eval: false
|
|
library(sandwich)
|
|
model.poisson <- early.day_counts %>%
|
|
filter(server == "mastodon.online") %>%
|
|
filter(created_day > "2020-08-01") %>%
|
|
filter(created_day < "2021-09-01") %>%
|
|
ungroup %>%
|
|
arrange(created_day) %>%
|
|
mutate(day = row_number()) %>%
|
|
glm(count ~ day*open, data=., family=poisson)
|
|
|
|
v <- sqrt(diag(vcovHC(model.poisson, type = "HC0")))
|
|
coeftest(model.poisson, vcovHC(model.poisson, type="HC0"))
|
|
```
|
|
|
|
<!-- begin section that actually exports -->
|
|
|
|
::: {.panel-tabset}
|
|
|
|
#### Early
|
|
|
|
```{r}
|
|
#| label: fig-break-one-raw-counts
|
|
#| fig-height: 4
|
|
#| fig-width: 6.75
|
|
#| fig-env: figure*
|
|
#| fig-pos: p
|
|
early.data_plot
|
|
```
|
|
|
|
#### Email
|
|
|
|
```{r}
|
|
#| label: fig-break-two-raw-counts
|
|
#| fig-height: 3.5
|
|
#| fig-width: 6.75
|
|
#| fig-env: figure*
|
|
#| fig-pos: p
|
|
email.data_plot
|
|
```
|
|
|
|
#### Last
|
|
|
|
```{r}
|
|
#| label: fig-break-three-raw-counts
|
|
#| fig-height: 3.5
|
|
#| fig-width: 6.75
|
|
#| fig-env: figure*
|
|
#| fig-pos: p
|
|
last.data_plot
|
|
```
|
|
|
|
:::
|
|
|
|
::: {.panel-tabset}
|
|
|
|
#### Early
|
|
|
|
::: {#tbl-early .column-body}
|
|
|
|
```{r}
|
|
early.table
|
|
```
|
|
|
|
Caption
|
|
|
|
:::
|
|
|
|
#### Email
|
|
|
|
::: {#tbl-email .column-body}
|
|
|
|
```{r}
|
|
email.table
|
|
```
|
|
|
|
:::
|
|
|
|
#### Last
|
|
|
|
::: {#tbl-last .column-body}
|
|
|
|
```{r}
|
|
last.table
|
|
```
|
|
|
|
:::
|
|
|
|
:::
|
|
|
|
|
|
```{r}
|
|
#| label: fig-mastodon-online-forecast
|
|
#| fig-cap: "Historical signup counts for mastodon.online and two alternative forecasts based on whether or not mastoodn.social is accepting signups."
|
|
#| fig-height: 2.7
|
|
#| fig-width: 6.75
|
|
#| exec: false
|
|
#| fig-env: figure*
|
|
model_data <- early.day_counts %>%
|
|
mutate(count = log1p(count)) %>%
|
|
ungroup %>%
|
|
arrange(created_day) %>%
|
|
mutate(day = row_number())
|
|
|
|
fit <- model_data %>%
|
|
model(arima = ARIMA(count ~ open + day + open:day + fourier(period=7, K=2) + pdq(2,0,0) + PDQ(0,0,0,period=7)))
|
|
|
|
f_server <- "mastodon.online"
|
|
|
|
new_data <- tsibble(
|
|
created_day = max(model_data$created_day) + 1:100,
|
|
day = max(model_data$day) + 1:100,
|
|
server = f_server #""
|
|
)
|
|
|
|
model.obj <- fit %>%
|
|
filter(server == f_server) %>%
|
|
select(arima) %>% pull %>% first
|
|
|
|
forecast.open <- model.obj %>%
|
|
forecast(new_data=(new_data %>% add_column(open = TRUE))) %>%
|
|
hilo %>% unpack_hilo(`95%`)
|
|
|
|
forecast.closed <- model.obj %>%
|
|
forecast(new_data=(new_data %>% add_column(open = FALSE))) %>%
|
|
hilo %>% unpack_hilo(`95%`)
|
|
|
|
hist_data <- as_tibble(model_data) %>% filter(server == f_server) %>% select(created_day, server, count, open) %>% rename(count_mean=count)
|
|
|
|
bind_rows(
|
|
as_tibble(forecast.open),
|
|
as_tibble(forecast.closed)
|
|
) %>%
|
|
rename(count_mean=.mean) %>%
|
|
ggplot(aes(x=created_day, y=count_mean)) +
|
|
geom_line(aes(color=open, group=open)) + #, linetype="dashed") +
|
|
geom_ribbon(aes(ymin=`95%_lower`, ymax=`95%_upper`, group=open, fill=open), alpha=0.25) +
|
|
geom_line(aes(x=created_day, y=count_mean), data=hist_data) + # , color=open, group=open
|
|
geom_rect(data = (hist_data %>% filter(open)),
|
|
aes(xmin = created_day - 0.5, xmax = created_day + 0.5, ymin = 0, ymax = Inf),
|
|
fill = "lightblue", alpha = 0.3) + # Adjust color and transparency as needed
|
|
labs(
|
|
x = "Date",
|
|
y = "Accounts created (log1p)",
|
|
color = "Signups open on mastodon.social",
|
|
fill = "Signups open on mastodon.social"
|
|
) +
|
|
scale_x_date(expand = c(0, 0), date_labels = "%B %Y") +
|
|
theme_bw_small_labels() +
|
|
theme(legend.position="top", axis.title.x=element_blank())
|
|
``` |