uploaded exploratory r code
This commit is contained in:
parent
06ed67015e
commit
cf735b845b
142
exploratory-analysis/syphilis_traffic.Rmd
Normal file
142
exploratory-analysis/syphilis_traffic.Rmd
Normal file
@ -0,0 +1,142 @@
|
||||
---
|
||||
title: "syphilis traffic"
|
||||
output: html_document
|
||||
date: "2024-11-19"
|
||||
---
|
||||
|
||||
```{r}
|
||||
|
||||
library(ggplot2)
|
||||
library(dplyr)
|
||||
|
||||
posts_per_year = syphilisall %>%
|
||||
filter(!is.na(Year) & Year != 2021) %>%
|
||||
group_by(Year) %>%
|
||||
summarise(Number_of_Posts = n())
|
||||
|
||||
ggplot(posts_per_year, aes(x = Year, y = Number_of_Posts)) +
|
||||
geom_line() +
|
||||
geom_point() +
|
||||
labs(title = "Volume of Syphilis Traffic Over Time",
|
||||
x = "Year",
|
||||
y = "Number of Posts Across All of Reddit")
|
||||
|
||||
|
||||
```
|
||||
|
||||
|
||||
|
||||
```{r}
|
||||
|
||||
sum(is.na(syphilisall$Year))
|
||||
sum(syphilisall$Year == "")
|
||||
|
||||
```
|
||||
|
||||
|
||||
|
||||
```{r}
|
||||
|
||||
posts_per_year = syphilisall %>%
|
||||
filter(!is.na(Year) & Year != 2021) %>% # Exclude NA values and 2021
|
||||
group_by(Year, subreddit) %>% # Group by both 'Year' and 'subreddit'
|
||||
summarise(Number_of_Posts = n(), .groups = "drop") # Count the number of posts per subreddit and year
|
||||
|
||||
# Plotting the result
|
||||
ggplot(posts_per_year, aes(x = Year, y = Number_of_Posts, color = subreddit)) +
|
||||
geom_line(aes(group = subreddit)) + # Draw a line for each subreddit
|
||||
geom_point() + # Add points for each year/subreddit combination
|
||||
theme(legend.position="none") +
|
||||
labs(title = "Volume of Syphilis Traffic Over Time",
|
||||
x = "Year",
|
||||
y = "Number of Posts per Subreddit")
|
||||
|
||||
```
|
||||
|
||||
```{r}
|
||||
|
||||
unique_values = unique(syphilisall$subreddit)
|
||||
|
||||
length(unique_values)
|
||||
|
||||
```
|
||||
|
||||
```{r}
|
||||
|
||||
top_subreddits = syphilisall %>%
|
||||
filter(!is.na(Year) & Year != 2021) %>%
|
||||
group_by(subreddit) %>%
|
||||
summarise(total_posts = n(), .groups = "drop") %>%
|
||||
top_n(10, total_posts)
|
||||
|
||||
posts_per_year = syphilisall %>%
|
||||
filter(subreddit %in% top_subreddits$subreddit, !is.na(Year) & Year != 2021) %>%
|
||||
group_by(Year, subreddit) %>%
|
||||
summarise(Number_of_Posts = n(), .groups = "drop")
|
||||
|
||||
ggplot(posts_per_year, aes(x = Year, y = Number_of_Posts, color = subreddit)) +
|
||||
geom_line(aes(group = subreddit)) +
|
||||
geom_point() +
|
||||
labs(title = "Volume of Syphilis Traffic Over Time",
|
||||
x = "Year",
|
||||
y = "Number of Posts per Subreddit")
|
||||
|
||||
```
|
||||
|
||||
|
||||
```{r}
|
||||
|
||||
sum(is.na(syphilisall$subreddit))
|
||||
|
||||
sum(syphilisall$subreddit == "")
|
||||
|
||||
|
||||
```
|
||||
|
||||
|
||||
```{r}
|
||||
|
||||
top_subreddits = syphilisall %>%
|
||||
filter(!is.na(Year) & Year != 2021, subreddit != "") %>%
|
||||
group_by(subreddit) %>%
|
||||
summarise(total_posts = n(), .groups = "drop") %>%
|
||||
top_n(10, total_posts)
|
||||
|
||||
posts_per_year = syphilisall %>%
|
||||
filter(subreddit %in% top_subreddits$subreddit, !is.na(Year) & Year != 2021) %>%
|
||||
group_by(Year, subreddit) %>%
|
||||
summarise(Number_of_Posts = n(), .groups = "drop")
|
||||
|
||||
ggplot(posts_per_year, aes(x = Year, y = Number_of_Posts, color = subreddit)) +
|
||||
geom_line(aes(group = subreddit)) +
|
||||
geom_point() +
|
||||
labs(title = "Volume of Syphilis Traffic Over Time in Top Ten Subreddits",
|
||||
x = "Year",
|
||||
y = "Number of Posts per Subreddit")
|
||||
|
||||
```
|
||||
|
||||
|
||||
```{r}
|
||||
|
||||
top_subreddits = syphilistotal %>%
|
||||
filter(!is.na(Year) & Year != 2021) %>%
|
||||
group_by(subreddit) %>%
|
||||
summarise(total_posts = n(), .groups = "drop") %>%
|
||||
top_n(10, total_posts)
|
||||
|
||||
posts_per_year = syphilistotal %>%
|
||||
filter(subreddit %in% top_subreddits$subreddit, !is.na(Year) & Year != 2021) %>%
|
||||
group_by(Year, subreddit) %>%
|
||||
summarise(Number_of_Posts = n(), .groups = "drop")
|
||||
|
||||
ggplot(posts_per_year, aes(x = Year, y = Number_of_Posts, color = subreddit)) +
|
||||
geom_line(aes(group = subreddit)) +
|
||||
geom_point() +
|
||||
labs(title = "Volume of Syphilis Traffic Over Time",
|
||||
x = "Year",
|
||||
y = "Number of Posts per Subreddit")
|
||||
|
||||
```
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user