diff --git a/images/Mastodon_Single-column-layout.png b/images/Mastodon_Single-column-layout.png new file mode 100644 index 0000000..f797b8e Binary files /dev/null and b/images/Mastodon_Single-column-layout.png differ diff --git a/images/mastodon_map.png b/images/mastodon_map.png new file mode 100644 index 0000000..38e7dcb Binary files /dev/null and b/images/mastodon_map.png differ diff --git a/presentations/images b/presentations/images new file mode 120000 index 0000000..5e67573 --- /dev/null +++ b/presentations/images @@ -0,0 +1 @@ +../images \ No newline at end of file diff --git a/presentations/lightning_talk.qmd b/presentations/lightning_talk.qmd new file mode 100644 index 0000000..ec2b149 --- /dev/null +++ b/presentations/lightning_talk.qmd @@ -0,0 +1,260 @@ +--- +title: "Do Servers Matter on Mastodon?" +subtitle: "Data-driven Design for Decentralized Social Media" +author: "Carl Colglazier" +institute: + - "Community Data Science Collective" + - "Northwestern University" +date: "2024-03-08" +title-slide-attributes: + data-background: "#4c3854" +format: + revealjs: + width: 1600 + height: 900 + date-format: long + margin: 0.2 + center-title-slide: false + #disable-layout: true + theme: [default, presentation.scss] + slide-number: false + keep-md: true + pdf-max-pages-per-slide: 1 + template-partials: + - title-slide.html +# beamer: +# aspectratio: 169 +# theme: metropolis +# colortheme: seahorse +knitr: + opts_chunk: + dev: "ragg_png" + retina: 1 + dpi: 300 +execute: + freeze: auto + cache: true + echo: false +# fig-width: 5 +# fig-height: 6 +prefer-html: true +--- + +## Empirical Context + +::: {.columns} + +:::: {.column} + +- **The Fediverse**: A set of decentralized online social networks which interoperate using shared protocols like ActivityPub. + +- **Mastodon**: An open-source, decentralized social network and microblogging community. + +:::: + +:::: {.column} + +![A screenshot of Mastodon 2.9 (2019), from the Mastodon Blog.](images/Mastodon_Single-column-layout.png) + +:::: + +::: + +# The Fediverse is a network of _thousands_ of interconnected servers {background-color="black" data-background-image="images/mastodon_map.png" background-repeat="repeat" background-size="200px" background-opacity="0.5" .center auto-animate=true .fade-out} + +## Mastodon grew significantly in 2022 and 2023 + +```{r} +#| label: fig-account-timeline +#| fig-width: 5 +#| fig-height: 2.5 +library(here) +source(here("code/helpers.R")) +account_timeline_plot() +``` + +## Which server should I join? + +### Conflicting advice + +::: {.columns} +::: {.column} + +Just join any server! + +::: +::: {.column} + +Join the _right_ server! + +::: +::: + +::: {.fragment} +### Which is right? {.center-xy} +::: + +--- + +![](images/joinmastodon-screenshot.png){.center} + +# Does server choice matter? {.center} + +## Survival model for new accounts + +Are they more likely to stay active after 91 days. + +::: {.columns} + +::: {.column} + +```{r, cache.extra = tools::md5sum("code/survival.R")} +#| cache: true +#| label: fig-survival +#| fig-env: figure +#| fig-cap: "Survival probabilities for accounts created during May 2023." +#| fig-width: 3.375 +#| fig-height: 2.25 +#| fig-pos: h! + +library(here) +source(here("code/survival.R")) +plot_km +``` + +::: + +::: {.column .small} + +```{r} +#| label: tbl-coxme +library(ehahelper) +library(broom) + +cxme_table <- tidy(cxme) %>% + mutate(conf.low = exp(conf.low), conf.high=exp(conf.high)) %>% + mutate(term = case_when( + term == "factor(group)1" ~ "Join Mastodon", + term == "factor(group)2" ~ "General Servers", + term == "small_serverTRUE" ~ "Small Server", + TRUE ~ term + )) %>% + mutate(exp.coef = paste("(", round(conf.low, 2), ", ", round(conf.high, 2), ")", sep="")) %>% + select(term, estimate, exp.coef , p.value) + +cxme_table %>% knitr::kable(digits = 3) +``` + +::: + +::: + +## Accounts that move + +Do they move to larger servers or to smaller servers? + +::: {.small} + +```{r} +#| label: tbl-ergm-table +#| echo: false +#| warning: false +#| message: false +#| error: false + +library(here) +library(modelsummary) +library(kableExtra) +library(purrr) +library(stringr) +load(file = here("data/scratch/ergm-model-early.rda")) +load(file = here("data/scratch/ergm-model-late.rda")) + +if (knitr::is_latex_output()) { + format <- "latex_tabular" +} else { + format <- "html" +} + +x <- modelsummary( + list("Coef." = model.early, "Std.Error" = model.early, "Coef." = model.late, "Std.Error" = model.late), + estimate = c("{estimate}", "{stars}{std.error}", "{estimate}", "{stars}{std.error}"), + statistic = NULL, + gof_omit = ".*", + coef_rename = c( + "sum" = "Sum", + "nonzero" = "Nonzero", + "diff.sum0.h-t.accounts" = "Smaller server", + "nodeocov.sum.accounts" = "Server size\n(outgoing)", + "nodeifactor.sum.registrations.TRUE" = "Open registrations\n(incoming)", + "nodematch.sum.language" = "Languages match" + ), + align="lrrrr", + stars = c('*' = .05, '**' = 0.01, '***' = .001), + output = format + ) %>% add_header_above(c(" " = 1, "Model A" = 2, "Model B" = 2)) + +x +``` + +::: + +# Our analysis suggests {.center} + +- Accounts on large, general servers fare worse +- Moved accounts go to smaller servers + +Can we build a system that helps people find servers? + +# Recommendation System Concept + +- Report top **hashtags** used by the most accounts on each server +- Build an $M \times N$ server-tag matrix +- Normalize with Okai BM25 TF-IDF and L2 normalization + + +::: {.fragment} +Using this matrix, we can + +- Calculate similarity between servers using tags +- Calculate similarity between tags using servers +- Reccommend servers based on affinity toward certain tags +::: + +## Example: Server Similarity + +::: {#tbl-sim-servers} + +```{r} +#| label: table-sim-servers +library(tidyverse) +library(arrow) +library(here) + +sim_servers <- here("data/scratch/server_similarity.feather") %>% arrow::read_ipc_file() +server_of_interest <- "hci.social" +server_table <- sim_servers %>% + arrange(desc(Similarity)) %>% + filter(Source == server_of_interest | Target == server_of_interest) %>% + head(7) %>% + pivot_longer(cols=c(Source, Target)) %>% + filter(value != server_of_interest) %>% + select(value, Similarity) %>% + rename("Server" = "value") + +if (knitr::is_latex_output()) { + server_table %>% knitr::kable(format="latex", booktabs=TRUE, digits=3) +} else { + server_table %>% knitr::kable(digits = 3) +} +``` + +Top five servers most similar to hci.social + +::: + +# Future Work + +- Evaluation of the recommendation system +- More specific analysis of account attributes +- Simulations for robustness diff --git a/presentations/presentation.scss b/presentations/presentation.scss new file mode 100644 index 0000000..052701d --- /dev/null +++ b/presentations/presentation.scss @@ -0,0 +1,140 @@ +/*-- scss:defaults --*/ + +@import url(https://fonts.googleapis.com/css?family=Montserrat:300,300i&display=swap); +@import url(https://fonts.googleapis.com/css?family=Josefin+Sans&display=swap); +@import url(https://fonts.googleapis.com/css?family=Fira+Mono&display=swap); + +// fonts +$font-family-sans-serif: Montserrat, sans-serif !default; +$font-family-monospace: "Fira Mono", monospace !default; + +// colors +$body-bg: #fff !default; +$body-color: #272822 !default; +$link-color: #055099 !default; + +// headings +$presentation-heading-font: "Josefin Sans", sans-serif !default; +$presentation-heading-color: #3d2d43 !default; + +$h1-font-size: 1.6em !default; +$h2-font-size: 1.3em !default; +$h3-font-size: 1.15em !default; +$h4-font-size: 1em !default; + +// code blocks +$code-block-border-color: #a592ac !default; +$code-color: $presentation-heading-color; + +// +// Color system +// + +// stylelint-disable +$white: #fff !default; +$gray-100: #f1edf3 !default; +$gray-200: #e6dee9 !default; +$gray-300: #d4c6d9 !default; +$gray-400: #b69ebe !default; +$gray-500: #9f80aa !default; +$gray-600: #8b6699 !default; +$gray-700: #6e5078 !default; +$gray-800: #63486c !default; +$gray-900: #4c3854 !default; +$black: #000 !default; + +/*-- scss:rules --*/ + +#title-slide { + text-align: left; + display: flex!important; + flex-direction: column; + height: 100%; + justify-content: space-around; +} + +.title{ + color: $white; + font-size: 2.5em; + font-family: $presentation-heading-font; +} + +.subtitle{ + color: $gray-100; + font-size: $h2-font-size; + font-family: $presentation-heading-font; +} + + +.author { + color: $white; + font-size: $h3-font-size; + font-family: $presentation-heading-font; +} + +.date { + color: $white; +} + +.message{ + color: $presentation-heading-color; + font-size: $h2-font-size; + font-family: $presentation-heading-font; + text-align: center; +} + +.space-left { + margin-left: 5%; +} + +.center-xy { + margin: 0; + position: absolute; + top: 50%; + left: 50%; + -ms-transform: translateY(-50%), translateX(-50%); + transform: translateY(-50%), translateX(-50%); +} + +.small { + font-size: 0.8em +} + +.tiny { + font-size: 0.4em +} + +.Large { + font-size: 1.6em +} + +section.has-dark-background a { + color: $white; + border-style: dashed; + border-bottom-width: 1px; +} + +section.has-dark-background a:hover { + color: $white; + border-bottom: solid +} + +.reveal h2 { + padding-top: 20rem; + padding-bottom: 2rem; + padding-left: 20rem; + padding-right: 20rem; + background-color: $gray-800; + color: $white; + position: relative; + //top: -38rem; + margin-top: -22rem; + margin-bottom: 2rem; + left: -20rem; + width: 100%; +} + +/* +.slide { + padding: 2rem; +}*/ \ No newline at end of file