Add viz for processing data

This commit is contained in:
Carl Colglazier 2024-02-05 17:37:01 -05:00
parent fc929dbfed
commit a0b1a424f5
6 changed files with 2604 additions and 90 deletions

View File

@ -1,18 +1,21 @@
import polars as pl
import json
def get_fullweek_status_count(x: str):
def get_fullweek_status_count(x: str, attr):
try:
data = json.loads(x)
return int(data[1]["logins"])
return int(data[1][attr])
except:
return -1
def read_activity_file(f):
return pl.read_ipc("data/activity.feather").filter(
return pl.read_ipc(f).filter(
pl.col("data_string").str.starts_with('[{"week":')
).with_columns(
pl.col("data_string").map_elements(lambda x: get_fullweek_status_count(x)).alias('logins')
pl.col("data_string").map_elements(lambda x: get_fullweek_status_count(x, "week")).alias('week'),
pl.col("data_string").map_elements(lambda x: get_fullweek_status_count(x, "logins")).alias('logins'),
pl.col("data_string").map_elements(lambda x: get_fullweek_status_count(x, "statuses")).alias('statuses'),
pl.col("data_string").map_elements(lambda x: get_fullweek_status_count(x, "registrations")).alias('registrations'),
).sort("logins")
def read_metadata_file(f):
@ -34,7 +37,11 @@ def read_metadata_file(f):
)
def read_accounts_file(f):
df = pl.read_ipc(f).with_columns(
return pl.read_ipc(f).filter(
pl.col("data_string").str.contains('"pleroma":').not_()
#).filter(
#pl.col("data_string").str.starts_with('{"id"')
).with_columns(
pl.col("data_string").str.json_decode().alias("data")
).with_columns(
pl.col("data").struct.field("id"),
@ -58,10 +65,6 @@ def read_accounts_file(f):
pl.col("data").struct.field("statuses_count"),
pl.col("data").struct.field("last_status_at"),
pl.col("data").struct.field("noindex"),
pl.col("data").struct.field("emojis"),
pl.col("data").struct.field("roles"),
pl.col("data").struct.field("fields"),
pl.col("data").struct.field("suspended"),
).with_columns(
pl.when(
pl.col("last_status_at").str.len_chars() > 10).then(
@ -70,4 +73,3 @@ def read_accounts_file(f):
pl.col("last_status_at").str.strptime(pl.Datetime, format='%Y-%m-%d', strict=False).dt.replace_time_zone("UTC")
).alias("last_status_at")
)
return df

378
index.qmd
View File

@ -1,5 +1,5 @@
---
title: Onboarding The Fediverse (working title)
title: Best Practices for Onboarding on the Fediverse
short-title: Onboarding Fediverse
authors:
- name: Carl Colglazier
@ -27,6 +27,7 @@ acm-metadata:
#isbn: 978-1-4503-XXXX-X/18/06
format:
acm-pdf:
keep-tex: true
documentclass: acmart
classoption: [acmsmall,manuscript,screen,authorversion,nonacm,timestamp]
abstract: |
@ -48,6 +49,10 @@ library(network)
library(survival)
library(ggsurvfit)
library(modelsummary)
library(randomForestSRC)
library(grid)
library(scales)
options(arrow.skip_nul = TRUE)
```
@ -73,7 +78,9 @@ All online communities and accounts trend toward death.
# Empirical Setting
The Fediverse is a set of decentralized online social networks which interoperate using shared protocols like ActivityPub. Mastodon is a software program used by many Fediverse servers and offers a user experience similar to the Tweetdeck client for Twitter.
The Fediverse is a set of decentralized online social networks which interoperate using shared protocols like ActivityPub.
Mastodon is a software program used by many Fediverse servers and offers a user experience similar to the Tweetdeck client for Twitter. It was first created in late 2016.
Discovery has been challenging on Masotodon. The developers and user base tend to be skeptical of algorithmic intrusions, instead opting for timelines which only show posts in reverse chronological order. Search is also difficult. Public hashtags are searchable, but most servers have traditionally not supported searching keywords or simple strings. Accounts can only be searched using their full `username@server` form.
@ -89,28 +96,48 @@ Mastodon offers its users high levels of data portability. Users can move their
#| output: false
from code.load_accounts import *
from urllib.parse import urlparse
accounts = read_accounts_file("data/accounts.feather")
#accounts = pl.concat(
# read_accounts_file("data/accounts.feather"),
# read_accounts_file("data/account_lookup_2023.feather")
#)
accounts = read_accounts_file(
"data/account_lookup_compressed.feather"
).unique(["account", "server"])
# Write a parsed accounts file for R to use
accounts.with_columns(
pl.col("data").struct.field("moved").is_not_null().alias("has_moved")
).drop(
["data", "data_string"]
a = accounts.with_columns(
pl.col("url").map_elements(
lambda x: urlparse(x).netloc.encode().decode('idna')
).alias("host"),
pl.col("data_string").str.contains("""\"moved\": \{""").alias("has_moved"),
pl.col("data").struct.field("suspended"),
)
a_save = a.drop(["data", "data_string"])
a_save.select(
sorted(a_save.columns)
).write_ipc("data/scratch/accounts.feather")
moved_accounts = accounts.with_columns(
moved_accounts = a.filter(pl.col("has_moved")).with_columns(# Do this again now we know the rows are all moved accounts
pl.col("data_string").str.json_decode().alias("data")
).with_columns(
pl.col("data").struct.field("moved")
).drop_nulls("moved").with_columns(
pl.col("moved").struct.field("acct").alias("moved_acct"),
).filter(
pl.col("moved_acct").str.contains('@')
).with_columns(
pl.col("moved_acct").str.split('@').list.get(1).alias("moved_server")
pl.when(
pl.col("moved_acct").str.contains('@')
).then(
pl.col("moved_acct").str.split('@').list.get(1)
).otherwise(
pl.col("server")
).alias("moved_server")
)
number_of_accounts = len(accounts)
number_of_accounts = len(a)
popular_servers = accounts.group_by("server").count().sort("count", descending=True)
popular_servers = a.group_by("server").count().sort("count", descending=True)
common_moves = moved_accounts.group_by(
["server", "moved_server"]
@ -134,57 +161,181 @@ read_metadata_file("data/metadata_2023-10-01.feather").drop(
).write_ipc("data/scratch/metadata.feather")
```
```{r}
#| label: r-load-accounts
```{python}
#| label: py-preprocess-data2
#| cache: true
#| output: false
accounts <- arrow::read_feather("data/scratch/accounts.feather", col_select=c("server", "username", "created_at", "last_status_at", "statuses_count", "has_moved", "bot")) %>%
filter(!has_moved) %>%
from code.load_accounts import read_accounts_file
from urllib.parse import urlparse
import polars as pl
profile_accounts = read_accounts_file("data/profiles_local.feather")
p = profile_accounts.with_columns(
pl.col("url").map_elements(lambda x: urlparse(x).netloc.encode().decode('idna')).alias("host"),
pl.col("username").alias("account"),
pl.lit(False).alias("has_moved"),
pl.lit(False).alias("suspended")
).drop(
["data", "data_string"]
)
p.select(sorted(p.columns)).write_ipc("data/scratch/accounts_processed_profiles.feather")
all_accounts = pl.scan_ipc(
[
"data/scratch/accounts.feather",
#"data/scratch/accounts_processed_recent.feather",
"data/scratch/accounts_processed_profiles.feather"
]).collect()
all_accounts.filter(pl.col("host").eq(pl.col("server"))).unique(["account", "server"]).write_ipc("data/scratch/all_accounts.feather")
```
```{r}
#| eval: false
arrow::read_feather(
"data/scratch/accounts.feather",
col_select = c(
"server", "username", "created_at",
"last_status_at", "statuses_count",
"has_moved", "bot", "suspended"
)) %>%
mutate(suspended = replace_na(suspended, FALSE)) %>%
filter(!bot) %>%
# TODO: what's going on here?
filter(!is.na(last_status_at)) %>%
# sanity check
filter(created_at >= "2022-01-01") %>%
filter(created_at < "2023-03-01") %>%
# We don't want accounts that were created and then immediately stopped being active
filter(statuses_count >= 5) %>%
filter(last_status_at >= created_at) %>%
mutate(active = last_status_at >= "2023-09-01") %>%
# set max last_status_at to 2023-06-01
mutate(last_status_at = ifelse(active, lubridate::ymd_hms("2023-09-01 00:00:00", tz = "UTC"), last_status_at)) %>%
mutate(active_time = difftime(last_status_at, created_at, units="secs"))
filter(created_at < "2024-03-01") %>%
# We don't want accounts that were created
# and then immediately stopped being active
filter(statuses_count > 1) %>%
filter(!suspended) %>%
filter(!has_moved) %>%
#filter(last_status_at >= created_at) %>%
mutate(created_month = format(created_at, "%Y-%m")) %>%
group_by(created_month) %>%
summarize(count=n()) %>%
distinct(created_month, count) %>%
ggplot(aes(x=created_month, y=count)) +
geom_bar(stat="identity", fill="black") +
labs(y="Count", x="Created Month") +
theme_bw() + theme(axis.text.x = element_text(angle = 90, hjust = 1))
```
**Mastodon Profiles**: We collected accounts using data previously collected from posts on public Mastodon timelines from October 2020 to February 2023. We then queried for up-to-date infromation on those accounts including their most recent status and if the account had moved. This gave us a total of `r nrow(accounts)` accounts.
```{r}
adv_server_counts <- arrow::read_feather("data/scratch/accounts.feather", col_select=c("server", "username", "created_at", "bot")) %>%
#| label: fig-account-timeline
#| fig-cap: "Accounts in the dataset created between January 2022 and March 2023. The top panels shows the proportion of accounts still active 45 days after creation, the proportion of accounts that have moved, and the proportion of accounts that have been suspended. The bottom panel shows the count of accounts created each week. The dashed vertical lines in the bottom panel represent the annoucement day of the Elon Musk Twitter acquisition, the acquisition closing day, and a day when Twitter experienced an outage and started rate limiting accounts."
#| fig-height: 3
#| fig-width: 6.75
accounts_unfilt <- arrow::read_feather("data/scratch/all_accounts.feather", col_select=c("server", "username", "created_at", "last_status_at", "statuses_count", "has_moved", "bot", "suspended", "host")) %>%
filter(server == host)
accounts <- accounts_unfilt
filter(!bot) %>%
filter(created_at > "2017-01-01") %>%
filter(created_at <= "2023-01-01") %>%
group_by(server) %>%
arrange(created_at) %>%
mutate(r = row_number()) %>%
arrange(desc(r)) %>%
distinct(server, created_at, .keep_all=TRUE) %>%
select(server, created_at, r) %>%
ungroup() %>%
mutate(server_date = as.Date(created_at))
# TODO: what's going on here?
filter(!is.na(last_status_at)) %>%
mutate(suspended = replace_na(suspended, FALSE)) %>%
# sanity check
filter(created_at >= "2022-01-01") %>%
filter(created_at < "2023-08-01") %>%
# We don't want accounts that were created and then immediately stopped being active
filter(statuses_count >= 1) %>%
filter(last_status_at >= created_at) %>%
mutate(active = last_status_at >= "2024-01-01") %>%
mutate(last_status_at = ifelse(active, lubridate::ymd_hms("2024-01-01 00:00:00", tz = "UTC"), last_status_at)) %>%
mutate(active_time = difftime(last_status_at, created_at, units="days")) %>%
filter(!has_moved)
acc_data <- accounts_unfilt %>%
mutate(created_month = format(created_at, "%Y-%m")) %>%
mutate(created_week = floor_date(created_at, unit = "week")) %>%
mutate(active = active_time >= 45) %>%
group_by(created_week) %>%
summarize(
Suspended = sum(suspended)/n(),
Active = (sum(active)-sum(has_moved)-sum(suspended))/(n()-sum(has_moved)-sum(suspended)),
Moved=sum(has_moved)/n(),
count=n()) %>%
pivot_longer(cols=c("Active", "Moved", "Suspended"), names_to="Measure", values_to="value")
theme_bw_small_labels <- function(base_size = 9) {
theme_bw(base_size = base_size) %+replace%
theme(
plot.title = element_text(size = base_size * 0.8),
plot.subtitle = element_text(size = base_size * 0.75),
plot.caption = element_text(size = base_size * 0.7),
axis.title = element_text(size = base_size * 0.9),
axis.text = element_text(size = base_size * 0.8),
legend.title = element_text(size = base_size * 0.9),
legend.text = element_text(size = base_size * 0.8)
)
}
p1 <- acc_data %>%
ggplot(aes(x=as.Date(created_week), group=1)) +
geom_line(aes(y=value, group=Measure, color=Measure)) +
geom_point(aes(y=value, color=Measure)) +
scale_y_continuous(limits = c(0, 1.0)) +
labs(y="Proportion") + scale_x_date(labels=date_format("%Y-%U"), breaks = "4 week") +
theme_bw_small_labels() +
theme(axis.title.x = element_blank(), axis.text.x = element_blank(), axis.ticks.x = element_blank())
p2 <- acc_data %>%
distinct(created_week, count) %>%
ggplot(aes(x=as.Date(created_week), y=count)) +
geom_bar(stat="identity", fill="black") +
geom_vline(
aes(xintercept = as.numeric(as.Date("2022-10-27"))),
linetype="dashed", color = "black") +
#geom_text(
# aes(x=as.Date("2022-10-27"),
# y=max(count),
# label=" Elon Musk Twitter Acquisition Completed"),
# vjust=-1, hjust=0, color="black") +
geom_vline(
aes(xintercept = as.numeric(as.Date("2022-04-14"))),
linetype="dashed", color = "black") +
# https://twitter.com/elonmusk/status/1675187969420828672
geom_vline(
aes(xintercept = as.numeric(as.Date("2023-07-01"))),
linetype="dashed", color = "black") +
#scale_y_continuous(limits = c(0, max(acc_data$count) + 100000)) +
labs(y="Count", x="Created Week") +
theme_bw_small_labels() + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + scale_x_date(labels=date_format("%Y-%U"), breaks = "4 week")
#grid.draw(rbind(ggplotGrob(p1), ggplotGrob(p2), size = "last"))
library(patchwork)
p1 + p2 + plot_layout(ncol = 1)
```
```{r}
adv_server_counts %>% filter(server == "mastodon.social") %>%
ggplot(aes(x=server_date, y=r)) +
geom_line() + theme_minimal()
```
**Mastodon Profiles (2022)**: We collected accounts using data previously collected from posts on public Mastodon timelines from October 2020 to January 2024. We then queried for up-to-date information on those accounts including their most recent status and if the account had moved. This gave us a total of `r nrow(accounts)` accounts, of which `r accounts %>% ` We selected accounts created after January 1, 2022 and before August 1, 2023 and which posted least one status.
```{r}
#| label: fig-account-activity-prop
#| fig-cap: "Account Activity Over Time"
#| fig-height: 4
#| eval: false
study_period <- 45
#formerly accounts_processed_recent
accounts_unfilt <- arrow::read_feather("data/scratch/all_accounts.feather", col_select=c("server", "host", "username", "created_at", "last_status_at", "statuses_count", "has_moved", "bot", "uri", "suspended")) %>%
filter(server == host) %>%
filter(bot != TRUE) %>%
mutate(suspended = replace_na(suspended, FALSE)) %>%
filter(suspended != TRUE) %>%
# TODO: what's going on here?
filter(!is.na(last_status_at)) %>%
# sanity check
filter(created_at >= "2023-10-15") %>% filter(created_at < "2024-01-01") %>%
# We don't want accounts that were created and then immediately stopped being active
filter(statuses_count >= 1) %>%
filter(last_status_at >= created_at) %>%
mutate(active_time = difftime(last_status_at, created_at, units="days")) %>%
select(server, username, created_at, active_time, last_status_at, has_moved) %>%
mutate(active = active_time >= study_period) %>%
mutate(active_time = ifelse(active_time > study_period, study_period, active_time))
server_counts <- arrow::read_feather("data/scratch/accounts.feather", col_select=c("server", "username", "created_at", "bot")) %>%
filter(created_at <= "2023-01-01") %>%
accounts <- accounts_unfilt %>% filter(!has_moved)
server_counts <- arrow::read_feather(
"data/scratch/accounts.feather",
col_select=c("server", "username", "created_at", "bot")
) %>%
filter(created_at <= "2023-03-01") %>%
filter(!bot) %>%
group_by(server) %>%
summarize(server_count = n()) %>%
arrange(desc(server_count)) %>%
@ -195,14 +346,29 @@ metadata <- arrow::read_feather("data/scratch/metadata.feather", col_select=c("s
mutate(server_count = user_count) %>%
mutate(server_count_bin = floor(log10(server_count)))
activity <- arrow::read_feather(
"data/scratch/activity.feather",
col_select = c("server", "logins")
) %>%
arrange(desc(logins)) %>%
mutate(server_count = logins) %>%
mutate(server_count_bin = floor(log10(server_count))) %>%
# Merge 4 and 5
mutate(server_count_bin = ifelse(server_count_bin >= 5, 4, server_count_bin))# %>%
# Merge 2 and 3
#mutate(server_count_bin = ifelse(server_count_bin == 3, 2, server_count_bin))
jm <- arrow::read_feather("data/scratch/joinmastodon.feather")
a <- accounts %>%
inner_join(metadata, by="server") %>%
mutate(metadata = server_count > 500) %>%
inner_join(activity, by="server") %>%
mutate(large_server = server_count > 1000) %>%
mutate(active_time = as.integer(active_time)) %>%
mutate(active_time_weeks = active_time / 60 / 60 / 24 / 7) %>%
mutate(status = ifelse(active, 0, 1)) %>% mutate(jm = server %in% jm$domain)# %>% filter(jm)
mutate(active_time_weeks = active_time) %>%
mutate(status = ifelse(active, 0, 1)) %>%
mutate(jm = server %in% jm$domain) %>%
filter(server_count > 0)
survfit2(Surv(active_time_weeks, status) ~ server_count_bin, data = a) %>%
ggsurvfit() +
@ -210,27 +376,42 @@ survfit2(Surv(active_time_weeks, status) ~ server_count_bin, data = a) %>%
scale_y_continuous(limits = c(0, 1)) +
labs(
y = "Overall survival probability",
x = "Time (weeks)",
colour = "Server Size (log10)",
fill = "Server Size (log10)",
x = "Time (days)",
) +
add_risktable() +
scale_x_continuous(
breaks = seq(0, max(a$active_time_weeks, na.rm = TRUE), by = 52),
labels = seq(0, max(a$active_time_weeks, na.rm = TRUE), by = 52)
breaks = seq(0, max(a$active_time_weeks, na.rm = TRUE), by = 4),
labels = seq(0, max(a$active_time_weeks, na.rm = TRUE), by = 4)
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
```
To determine the relationship between server size and user retention, we...
```{r}
#| eval: false
sel_a <- a %>%
#filter(created_at >= "2022-06-27") %>%
#filter(created_at < "2022-08-26") %>% #%>% mutate(jm = as.integer(jm))
mutate(is_ms = server == "mastodon.social")
cx <- coxph(Surv(active_time_weeks, status) ~ log10(server_count) + jm, data = sel_a)
cz <- cox.zph(cx)
#plot(cz)
cx
```
```{r}
#| eval: false
obj <- rfsrc(Surv(active_time_weeks, status) ~ server_count_bin + jm, data = (a %>% sample_n(1000)), ntree=5000)
plot(get.tree(obj, 0))
```
## Moved Accounts
```{r}
#| label: fig-moved-accounts
#| fig-height: 4
#| eval: false
moved_accounts <- arrow::read_feather("data/scratch/moved_accounts.feather")
popular_servers <- arrow::read_feather("data/scratch/popular_servers.feather")
server_movement_data <- left_join(
@ -250,31 +431,63 @@ server_movement_data %>%
If there was no relationship, we would expect these jumps to be random with respect to server size.
```{r}
popular_servers <- arrow::read_feather("data/scratch/popular_servers.feather")
moved_accounts <- arrow::read_feather("data/scratch/moved_accounts.feather")
activity <- arrow::read_feather("data/scratch/activity.feather", col_select=c("server", "logins")) %>% arrange(desc(logins))
popular_and_large_servers <- popular_servers %>% filter(count >= 1) %>%
popular_servers <-
arrow::read_feather("data/scratch/popular_servers.feather")
moved_accounts <-
arrow::read_feather("data/scratch/moved_accounts.feather") %>%
# Remove loops
filter(server != moved_server)
activity <-
arrow::read_feather("data/scratch/activity.feather",
col_select = c("server", "logins")) %>%
arrange(desc(logins))
popular_and_large_servers <-
popular_servers %>% filter(count >= 1) %>%
mutate(count = log10(count))
jm <- arrow::read_feather("data/scratch/joinmastodon.feather")
ma <- moved_accounts %>% filter(server %in% popular_and_large_servers$server) %>% filter(moved_server %in% popular_and_large_servers$server)
edgeNet<-network(ma,matrix.type="edgelist")
edgeNet%v%"user_count" <- left_join((as_tibble(edgeNet%v%'vertex.names') %>% rename(server=value)), popular_and_large_servers, by="server") %>% select(count) %>% unlist()
edgeNet%v%"in_jm" <- as_tibble(edgeNet%v%'vertex.names') %>% mutate(in_jm = value %in% jm$domain) %>% select(in_jm) %>% unlist()
ma <- moved_accounts %>%
filter(server %in% popular_and_large_servers$server) %>%
filter(moved_server %in% popular_and_large_servers$server)
# Construct network
edgeNet <- network(ma, matrix.type = "edgelist")
edgeNet %v% "user_count" <-
left_join((as_tibble(edgeNet %v% 'vertex.names') %>% rename(server = value)),
popular_and_large_servers,
by = "server") %>%
select(count) %>%
unlist()
edgeNet %v% "in_jm" <-
as_tibble(edgeNet %v% 'vertex.names') %>%
mutate(in_jm = value %in% jm$domain) %>%
select(in_jm) %>% unlist()
```
We construct an exponential family random graph model (ERGM) where nodes represent servers and weighted directed edges represent the number of accounts that moved between servers.
$$
\begin{aligned}
\text{Sum}_{i,j} = & \beta_1 (log10(\text{user count}_j) - log10(\text{user count}_i)) + \\
& \beta_2 \\
& \beta_3 \\
& \beta_4 \\
\end{aligned}
$$
```{r}
#| label: ergm-model
#| cache: true
m1 <- ergm(edgeNet ~ sum + diff("user_count", pow=1, form="sum") + nodecov("user_count", form="sum") + nodematch("in_jm", diff=TRUE, form="sum"), response="count", reference=~Binomial(3))
m1 <-
ergm(
edgeNet ~ sum +
diff("user_count", pow = 1, form = "sum") +
nodecov("user_count", form = "sum") +
nodematch("in_jm", diff = TRUE, form = "sum"),
response = "count",
reference = ~ Binomial(3),
control=control.ergm(parallel=4, parallel.type="PSOCK")
)
save(m1, file="data/scratch/ergm-model.rda")
save(m1, file = "data/scratch/ergm-model.rda")
```
@ -285,16 +498,19 @@ ergm_model <- load("data/scratch/ergm-model.rda")
modelsummary(
m1,
escape = FALSE,
coef_rename = c(
"sum" = "Intercept",
"diff.sum.t-h.user_count " = "User Count Difference",
"nodecov.sum.user_count " = "User Count (Node Covariate)",
"nodematch.sum.in_jm.TRUE" = "In JoinMastodon (Both True)",
"nodematch.sum.in_jm.FALSE" = "In JoinMastodon (Both False)"
"sum" = "\\beta_0 Intercept",
"diff.sum.t-h.user_count" = "\\beta_1 User Count Difference",
"nodecov.sum.user_count" = "\\beta_2 User Count (Node Covariate)",
"nodematch.sum.in_jm.TRUE" = "\\beta_3 In JoinMastodon (Both True)",
"nodematch.sum.in_jm.FALSE" = "\\beta_4 In JoinMastodon (Both False)"
),
)
```
We find a strong preference for accounts to move from large servers to smaller servers.
```{python}
#| eval: false
#| include: false
@ -316,4 +532,6 @@ sim_counts = simulations.join(popular_servers, how="inner", on="server").rename(
## Tag Clusters
We found _number_ posts which contained between two and five tags.
We found _number_ posts which contained between two and five tags.
# References {#references}

13
junior-sheer.Rproj Normal file
View File

@ -0,0 +1,13 @@
Version: 1.0
RestoreWorkspace: Default
SaveWorkspace: Default
AlwaysSaveHistory: Default
EnableCodeIndexing: Yes
UseSpacesForTab: Yes
NumSpacesForTab: 2
Encoding: UTF-8
RnwWeave: knitr
LaTeX: pdfLaTeX

View File

@ -1,3 +1,19 @@
@article{cavaDriversSocialInfluence2023,
title = {Drivers of Social Influence in the {{Twitter}} Migration to {{Mastodon}}},
author = {Cava, Lucio La and Aiello, Luca Maria and Tagarelli, Andrea},
year = {2023},
month = dec,
journal = {Scientific Reports},
volume = {13},
number = {1},
pages = {21626},
issn = {2045-2322},
doi = {10.1038/s41598-023-48200-7},
urldate = {2024-02-02},
abstract = {The migration of Twitter users to Mastodon following Elon Musk's acquisition presents a unique opportunity to study collective behavior and gain insights into the drivers of coordinated behavior in online media. We analyzed the social network and the public conversations of about 75,000 migrated users and observed that the temporal trace of their migrations is compatible with a phenomenon of social influence, as described by a compartmental epidemic model of information diffusion. Drawing from prior research on behavioral change, we delved into the factors that account for variations of the effectiveness of the influence process across different Twitter communities. Communities in which the influence process unfolded more rapidly exhibit lower density of social connections, higher levels of signaled commitment to migrating, and more emphasis on shared identity and exchange of factual knowledge in the community discussion. These factors account collectively for 57\% of the variance in the observed data. Our results highlight the joint importance of network structure, commitment, and psycho-linguistic aspects of social interactions in characterizing grassroots collective action, and contribute to deepen our understanding of the mechanisms that drive processes of behavior change of online groups.},
langid = {english}
}
@article{fieslerMovingLandsOnline2020,
title = {Moving across Lands: Online Platform Migration in Fandom Communities},
shorttitle = {Moving across Lands},

2233
renv.lock

File diff suppressed because it is too large Load Diff

32
requirements.txt Normal file
View File

@ -0,0 +1,32 @@
appnope==0.1.3
asttokens==2.4.1
comm==0.2.1
debugpy==1.8.0
decorator==5.1.1
executing==2.0.1
ipykernel==6.29.0
ipython==8.20.0
jedi==0.19.1
jupyter_client==8.6.0
jupyter_core==5.7.1
matplotlib-inline==0.1.6
nest-asyncio==1.6.0
packaging==23.2
parso==0.8.3
pexpect==4.9.0
platformdirs==4.1.0
polars==0.20.6
prompt-toolkit==3.0.43
psutil==5.9.8
ptyprocess==0.7.0
pure-eval==0.2.2
Pygments==2.17.2
python-dateutil==2.8.2
pyzmq==25.1.2
setuptools==69.0.3
six==1.16.0
stack-data==0.6.3
tornado==6.4
traitlets==5.14.1
wcwidth==0.2.13
wheel==0.42.0