1
0
stigma-reddit/exploratory-analysis/syphilis_traffic.Rmd

143 lines
3.4 KiB
Plaintext

---
title: "syphilis traffic"
output: html_document
date: "2024-11-19"
---
```{r}
library(ggplot2)
library(dplyr)
posts_per_year = syphilisall %>%
filter(!is.na(Year) & Year != 2021) %>%
group_by(Year) %>%
summarise(Number_of_Posts = n())
ggplot(posts_per_year, aes(x = Year, y = Number_of_Posts)) +
geom_line() +
geom_point() +
labs(title = "Volume of Syphilis Traffic Over Time",
x = "Year",
y = "Number of Posts Across All of Reddit")
```
```{r}
sum(is.na(syphilisall$Year))
sum(syphilisall$Year == "")
```
```{r}
posts_per_year = syphilisall %>%
filter(!is.na(Year) & Year != 2021) %>% # Exclude NA values and 2021
group_by(Year, subreddit) %>% # Group by both 'Year' and 'subreddit'
summarise(Number_of_Posts = n(), .groups = "drop") # Count the number of posts per subreddit and year
# Plotting the result
ggplot(posts_per_year, aes(x = Year, y = Number_of_Posts, color = subreddit)) +
geom_line(aes(group = subreddit)) + # Draw a line for each subreddit
geom_point() + # Add points for each year/subreddit combination
theme(legend.position="none") +
labs(title = "Volume of Syphilis Traffic Over Time",
x = "Year",
y = "Number of Posts per Subreddit")
```
```{r}
unique_values = unique(syphilisall$subreddit)
length(unique_values)
```
```{r}
top_subreddits = syphilisall %>%
filter(!is.na(Year) & Year != 2021) %>%
group_by(subreddit) %>%
summarise(total_posts = n(), .groups = "drop") %>%
top_n(10, total_posts)
posts_per_year = syphilisall %>%
filter(subreddit %in% top_subreddits$subreddit, !is.na(Year) & Year != 2021) %>%
group_by(Year, subreddit) %>%
summarise(Number_of_Posts = n(), .groups = "drop")
ggplot(posts_per_year, aes(x = Year, y = Number_of_Posts, color = subreddit)) +
geom_line(aes(group = subreddit)) +
geom_point() +
labs(title = "Volume of Syphilis Traffic Over Time",
x = "Year",
y = "Number of Posts per Subreddit")
```
```{r}
sum(is.na(syphilisall$subreddit))
sum(syphilisall$subreddit == "")
```
```{r}
top_subreddits = syphilisall %>%
filter(!is.na(Year) & Year != 2021, subreddit != "") %>%
group_by(subreddit) %>%
summarise(total_posts = n(), .groups = "drop") %>%
top_n(10, total_posts)
posts_per_year = syphilisall %>%
filter(subreddit %in% top_subreddits$subreddit, !is.na(Year) & Year != 2021) %>%
group_by(Year, subreddit) %>%
summarise(Number_of_Posts = n(), .groups = "drop")
ggplot(posts_per_year, aes(x = Year, y = Number_of_Posts, color = subreddit)) +
geom_line(aes(group = subreddit)) +
geom_point() +
labs(title = "Volume of Syphilis Traffic Over Time in Top Ten Subreddits",
x = "Year",
y = "Number of Posts per Subreddit")
```
```{r}
top_subreddits = syphilistotal %>%
filter(!is.na(Year) & Year != 2021) %>%
group_by(subreddit) %>%
summarise(total_posts = n(), .groups = "drop") %>%
top_n(10, total_posts)
posts_per_year = syphilistotal %>%
filter(subreddit %in% top_subreddits$subreddit, !is.na(Year) & Year != 2021) %>%
group_by(Year, subreddit) %>%
summarise(Number_of_Posts = n(), .groups = "drop")
ggplot(posts_per_year, aes(x = Year, y = Number_of_Posts, color = subreddit)) +
geom_line(aes(group = subreddit)) +
geom_point() +
labs(title = "Volume of Syphilis Traffic Over Time",
x = "Year",
y = "Number of Posts per Subreddit")
```