143 lines
3.4 KiB
Plaintext
143 lines
3.4 KiB
Plaintext
---
|
|
title: "syphilis traffic"
|
|
output: html_document
|
|
date: "2024-11-19"
|
|
---
|
|
|
|
```{r}
|
|
|
|
library(ggplot2)
|
|
library(dplyr)
|
|
|
|
posts_per_year = syphilisall %>%
|
|
filter(!is.na(Year) & Year != 2021) %>%
|
|
group_by(Year) %>%
|
|
summarise(Number_of_Posts = n())
|
|
|
|
ggplot(posts_per_year, aes(x = Year, y = Number_of_Posts)) +
|
|
geom_line() +
|
|
geom_point() +
|
|
labs(title = "Volume of Syphilis Traffic Over Time",
|
|
x = "Year",
|
|
y = "Number of Posts Across All of Reddit")
|
|
|
|
|
|
```
|
|
|
|
|
|
|
|
```{r}
|
|
|
|
sum(is.na(syphilisall$Year))
|
|
sum(syphilisall$Year == "")
|
|
|
|
```
|
|
|
|
|
|
|
|
```{r}
|
|
|
|
posts_per_year = syphilisall %>%
|
|
filter(!is.na(Year) & Year != 2021) %>% # Exclude NA values and 2021
|
|
group_by(Year, subreddit) %>% # Group by both 'Year' and 'subreddit'
|
|
summarise(Number_of_Posts = n(), .groups = "drop") # Count the number of posts per subreddit and year
|
|
|
|
# Plotting the result
|
|
ggplot(posts_per_year, aes(x = Year, y = Number_of_Posts, color = subreddit)) +
|
|
geom_line(aes(group = subreddit)) + # Draw a line for each subreddit
|
|
geom_point() + # Add points for each year/subreddit combination
|
|
theme(legend.position="none") +
|
|
labs(title = "Volume of Syphilis Traffic Over Time",
|
|
x = "Year",
|
|
y = "Number of Posts per Subreddit")
|
|
|
|
```
|
|
|
|
```{r}
|
|
|
|
unique_values = unique(syphilisall$subreddit)
|
|
|
|
length(unique_values)
|
|
|
|
```
|
|
|
|
```{r}
|
|
|
|
top_subreddits = syphilisall %>%
|
|
filter(!is.na(Year) & Year != 2021) %>%
|
|
group_by(subreddit) %>%
|
|
summarise(total_posts = n(), .groups = "drop") %>%
|
|
top_n(10, total_posts)
|
|
|
|
posts_per_year = syphilisall %>%
|
|
filter(subreddit %in% top_subreddits$subreddit, !is.na(Year) & Year != 2021) %>%
|
|
group_by(Year, subreddit) %>%
|
|
summarise(Number_of_Posts = n(), .groups = "drop")
|
|
|
|
ggplot(posts_per_year, aes(x = Year, y = Number_of_Posts, color = subreddit)) +
|
|
geom_line(aes(group = subreddit)) +
|
|
geom_point() +
|
|
labs(title = "Volume of Syphilis Traffic Over Time",
|
|
x = "Year",
|
|
y = "Number of Posts per Subreddit")
|
|
|
|
```
|
|
|
|
|
|
```{r}
|
|
|
|
sum(is.na(syphilisall$subreddit))
|
|
|
|
sum(syphilisall$subreddit == "")
|
|
|
|
|
|
```
|
|
|
|
|
|
```{r}
|
|
|
|
top_subreddits = syphilisall %>%
|
|
filter(!is.na(Year) & Year != 2021, subreddit != "") %>%
|
|
group_by(subreddit) %>%
|
|
summarise(total_posts = n(), .groups = "drop") %>%
|
|
top_n(10, total_posts)
|
|
|
|
posts_per_year = syphilisall %>%
|
|
filter(subreddit %in% top_subreddits$subreddit, !is.na(Year) & Year != 2021) %>%
|
|
group_by(Year, subreddit) %>%
|
|
summarise(Number_of_Posts = n(), .groups = "drop")
|
|
|
|
ggplot(posts_per_year, aes(x = Year, y = Number_of_Posts, color = subreddit)) +
|
|
geom_line(aes(group = subreddit)) +
|
|
geom_point() +
|
|
labs(title = "Volume of Syphilis Traffic Over Time in Top Ten Subreddits",
|
|
x = "Year",
|
|
y = "Number of Posts per Subreddit")
|
|
|
|
```
|
|
|
|
|
|
```{r}
|
|
|
|
top_subreddits = syphilistotal %>%
|
|
filter(!is.na(Year) & Year != 2021) %>%
|
|
group_by(subreddit) %>%
|
|
summarise(total_posts = n(), .groups = "drop") %>%
|
|
top_n(10, total_posts)
|
|
|
|
posts_per_year = syphilistotal %>%
|
|
filter(subreddit %in% top_subreddits$subreddit, !is.na(Year) & Year != 2021) %>%
|
|
group_by(Year, subreddit) %>%
|
|
summarise(Number_of_Posts = n(), .groups = "drop")
|
|
|
|
ggplot(posts_per_year, aes(x = Year, y = Number_of_Posts, color = subreddit)) +
|
|
geom_line(aes(group = subreddit)) +
|
|
geom_point() +
|
|
labs(title = "Volume of Syphilis Traffic Over Time",
|
|
x = "Year",
|
|
y = "Number of Posts per Subreddit")
|
|
|
|
```
|
|
|
|
|