--- title: "syphilis traffic" output: html_document date: "2024-11-19" --- ```{r} library(ggplot2) library(dplyr) posts_per_year = syphilisall %>% filter(!is.na(Year) & Year != 2021) %>% group_by(Year) %>% summarise(Number_of_Posts = n()) ggplot(posts_per_year, aes(x = Year, y = Number_of_Posts)) + geom_line() + geom_point() + labs(title = "Volume of Syphilis Traffic Over Time", x = "Year", y = "Number of Posts Across All of Reddit") ``` ```{r} sum(is.na(syphilisall$Year)) sum(syphilisall$Year == "") ``` ```{r} posts_per_year = syphilisall %>% filter(!is.na(Year) & Year != 2021) %>% # Exclude NA values and 2021 group_by(Year, subreddit) %>% # Group by both 'Year' and 'subreddit' summarise(Number_of_Posts = n(), .groups = "drop") # Count the number of posts per subreddit and year # Plotting the result ggplot(posts_per_year, aes(x = Year, y = Number_of_Posts, color = subreddit)) + geom_line(aes(group = subreddit)) + # Draw a line for each subreddit geom_point() + # Add points for each year/subreddit combination theme(legend.position="none") + labs(title = "Volume of Syphilis Traffic Over Time", x = "Year", y = "Number of Posts per Subreddit") ``` ```{r} unique_values = unique(syphilisall$subreddit) length(unique_values) ``` ```{r} top_subreddits = syphilisall %>% filter(!is.na(Year) & Year != 2021) %>% group_by(subreddit) %>% summarise(total_posts = n(), .groups = "drop") %>% top_n(10, total_posts) posts_per_year = syphilisall %>% filter(subreddit %in% top_subreddits$subreddit, !is.na(Year) & Year != 2021) %>% group_by(Year, subreddit) %>% summarise(Number_of_Posts = n(), .groups = "drop") ggplot(posts_per_year, aes(x = Year, y = Number_of_Posts, color = subreddit)) + geom_line(aes(group = subreddit)) + geom_point() + labs(title = "Volume of Syphilis Traffic Over Time", x = "Year", y = "Number of Posts per Subreddit") ``` ```{r} sum(is.na(syphilisall$subreddit)) sum(syphilisall$subreddit == "") ``` ```{r} top_subreddits = syphilisall %>% filter(!is.na(Year) & Year != 2021, subreddit != "") %>% group_by(subreddit) %>% summarise(total_posts = n(), .groups = "drop") %>% top_n(10, total_posts) posts_per_year = syphilisall %>% filter(subreddit %in% top_subreddits$subreddit, !is.na(Year) & Year != 2021) %>% group_by(Year, subreddit) %>% summarise(Number_of_Posts = n(), .groups = "drop") ggplot(posts_per_year, aes(x = Year, y = Number_of_Posts, color = subreddit)) + geom_line(aes(group = subreddit)) + geom_point() + labs(title = "Volume of Syphilis Traffic Over Time in Top Ten Subreddits", x = "Year", y = "Number of Posts per Subreddit") ``` ```{r} top_subreddits = syphilistotal %>% filter(!is.na(Year) & Year != 2021) %>% group_by(subreddit) %>% summarise(total_posts = n(), .groups = "drop") %>% top_n(10, total_posts) posts_per_year = syphilistotal %>% filter(subreddit %in% top_subreddits$subreddit, !is.na(Year) & Year != 2021) %>% group_by(Year, subreddit) %>% summarise(Number_of_Posts = n(), .groups = "drop") ggplot(posts_per_year, aes(x = Year, y = Number_of_Posts, color = subreddit)) + geom_line(aes(group = subreddit)) + geom_point() + labs(title = "Volume of Syphilis Traffic Over Time", x = "Year", y = "Number of Posts per Subreddit") ```