deleted files already in repository
This commit is contained in:
parent
cd55f5bf69
commit
855fa49611
Binary file not shown.
Before Width: | Height: | Size: 139 KiB |
Binary file not shown.
Before Width: | Height: | Size: 115 KiB |
Binary file not shown.
Before Width: | Height: | Size: 366 KiB |
@ -1,58 +0,0 @@
|
|||||||
setwd("~/Desktop/cdsc/health literacy/")
|
|
||||||
|
|
||||||
best = read.csv("Best_Data.csv")
|
|
||||||
|
|
||||||
# subsetting desired years
|
|
||||||
|
|
||||||
years = c("2007", "2010", "2013", "2016")
|
|
||||||
|
|
||||||
best_subsetted = best[best$Year %in% years,]
|
|
||||||
|
|
||||||
# creating empty rows for interpolation + interpolating the mean (stigma) scores
|
|
||||||
|
|
||||||
expand = function(df){
|
|
||||||
total_years = seq(min(df$Year), max(df$Year))
|
|
||||||
expanded_df = data.frame(Year = total_years)
|
|
||||||
unique_diseases = unique(df$Reconciled_Name)
|
|
||||||
expanded_data = data.frame()
|
|
||||||
|
|
||||||
for (disease in unique_diseases){
|
|
||||||
disease_data = df[df$Reconciled_Name == disease, ]
|
|
||||||
expanded_disease_data = merge(expanded_df, disease_data, by = "Year", all.x = TRUE)
|
|
||||||
expanded_disease_data$Reconciled_Name = disease
|
|
||||||
expanded_data = rbind(expanded_data, expanded_disease_data)
|
|
||||||
}
|
|
||||||
|
|
||||||
for (disease in unique_diseases) {
|
|
||||||
disease_data = expanded_data[expanded_data$Reconciled_Name == disease, ]
|
|
||||||
known_year = which(!is.na(disease_data$mean))
|
|
||||||
# sort known_year it by year
|
|
||||||
|
|
||||||
for (i in 1:(length(known_year) - 1)) {
|
|
||||||
before_year = known_year[i]
|
|
||||||
after_year = known_year[i + 1]
|
|
||||||
if (is.na(after_year)){
|
|
||||||
print("break")
|
|
||||||
break
|
|
||||||
}
|
|
||||||
start_mean = disease_data$mean[before_year]
|
|
||||||
end_mean = disease_data$mean[after_year]
|
|
||||||
num_missing = after_year - before_year - 1
|
|
||||||
print(after_year)
|
|
||||||
|
|
||||||
if (num_missing > 0) {
|
|
||||||
increment = (end_mean - start_mean) / (num_missing + 1)
|
|
||||||
|
|
||||||
for (j in 1:num_missing) {
|
|
||||||
disease_data$mean[before_year + j] = start_mean + increment * j
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
expanded_data[expanded_data$Reconciled_Name == disease, ] = disease_data
|
|
||||||
}
|
|
||||||
|
|
||||||
return(expanded_data)
|
|
||||||
}
|
|
||||||
|
|
||||||
best_interpolated = expand(best_subsetted)
|
|
@ -1,19 +0,0 @@
|
|||||||
setwd("~/Desktop/cdsc/health literacy/")
|
|
||||||
|
|
||||||
mapping = read.delim("Mapping - Sheet1.tsv")
|
|
||||||
mapping <- mapping[,colnames(mapping) != "Notes"]
|
|
||||||
|
|
||||||
IHME = read.csv("IHME_Data.csv")
|
|
||||||
best <- read.csv("Best_Data.csv")
|
|
||||||
|
|
||||||
# first, create the list of all the one-to-ones
|
|
||||||
d <- merge(best, mapping[!is.na(mapping$Mapping_Type) & mapping$Mapping_Type == "one-to-one",], by.x="Reconciled_Name", by.y="Best_Disease_Name", all=TRUE)
|
|
||||||
|
|
||||||
|
|
||||||
# now merge the imhe data on
|
|
||||||
d <- merge(d, IHME[IHME$measure_name == "DALYs (Disability-Adjusted Life Years)",
|
|
||||||
c("cause_id", "measure_name", "year", "val")],
|
|
||||||
by.x=c("IHME_Cause_ID", "Year"),
|
|
||||||
by.y=c("cause_id", "year"))
|
|
||||||
# next, handle the one-to-manys but summing up
|
|
||||||
|
|
Binary file not shown.
Before Width: | Height: | Size: 355 KiB |
@ -1,142 +0,0 @@
|
|||||||
---
|
|
||||||
title: "syphilis traffic"
|
|
||||||
output: html_document
|
|
||||||
date: "2024-11-19"
|
|
||||||
---
|
|
||||||
|
|
||||||
```{r}
|
|
||||||
|
|
||||||
library(ggplot2)
|
|
||||||
library(dplyr)
|
|
||||||
|
|
||||||
posts_per_year = syphilisall %>%
|
|
||||||
filter(!is.na(Year) & Year != 2021) %>%
|
|
||||||
group_by(Year) %>%
|
|
||||||
summarise(Number_of_Posts = n())
|
|
||||||
|
|
||||||
ggplot(posts_per_year, aes(x = Year, y = Number_of_Posts)) +
|
|
||||||
geom_line() +
|
|
||||||
geom_point() +
|
|
||||||
labs(title = "Volume of Syphilis Traffic Over Time",
|
|
||||||
x = "Year",
|
|
||||||
y = "Number of Posts Across All of Reddit")
|
|
||||||
|
|
||||||
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
```{r}
|
|
||||||
|
|
||||||
sum(is.na(syphilisall$Year))
|
|
||||||
sum(syphilisall$Year == "")
|
|
||||||
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
```{r}
|
|
||||||
|
|
||||||
posts_per_year = syphilisall %>%
|
|
||||||
filter(!is.na(Year) & Year != 2021) %>% # Exclude NA values and 2021
|
|
||||||
group_by(Year, subreddit) %>% # Group by both 'Year' and 'subreddit'
|
|
||||||
summarise(Number_of_Posts = n(), .groups = "drop") # Count the number of posts per subreddit and year
|
|
||||||
|
|
||||||
# Plotting the result
|
|
||||||
ggplot(posts_per_year, aes(x = Year, y = Number_of_Posts, color = subreddit)) +
|
|
||||||
geom_line(aes(group = subreddit)) + # Draw a line for each subreddit
|
|
||||||
geom_point() + # Add points for each year/subreddit combination
|
|
||||||
theme(legend.position="none") +
|
|
||||||
labs(title = "Volume of Syphilis Traffic Over Time",
|
|
||||||
x = "Year",
|
|
||||||
y = "Number of Posts per Subreddit")
|
|
||||||
|
|
||||||
```
|
|
||||||
|
|
||||||
```{r}
|
|
||||||
|
|
||||||
unique_values = unique(syphilisall$subreddit)
|
|
||||||
|
|
||||||
length(unique_values)
|
|
||||||
|
|
||||||
```
|
|
||||||
|
|
||||||
```{r}
|
|
||||||
|
|
||||||
top_subreddits = syphilisall %>%
|
|
||||||
filter(!is.na(Year) & Year != 2021) %>%
|
|
||||||
group_by(subreddit) %>%
|
|
||||||
summarise(total_posts = n(), .groups = "drop") %>%
|
|
||||||
top_n(10, total_posts)
|
|
||||||
|
|
||||||
posts_per_year = syphilisall %>%
|
|
||||||
filter(subreddit %in% top_subreddits$subreddit, !is.na(Year) & Year != 2021) %>%
|
|
||||||
group_by(Year, subreddit) %>%
|
|
||||||
summarise(Number_of_Posts = n(), .groups = "drop")
|
|
||||||
|
|
||||||
ggplot(posts_per_year, aes(x = Year, y = Number_of_Posts, color = subreddit)) +
|
|
||||||
geom_line(aes(group = subreddit)) +
|
|
||||||
geom_point() +
|
|
||||||
labs(title = "Volume of Syphilis Traffic Over Time",
|
|
||||||
x = "Year",
|
|
||||||
y = "Number of Posts per Subreddit")
|
|
||||||
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
```{r}
|
|
||||||
|
|
||||||
sum(is.na(syphilisall$subreddit))
|
|
||||||
|
|
||||||
sum(syphilisall$subreddit == "")
|
|
||||||
|
|
||||||
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
```{r}
|
|
||||||
|
|
||||||
top_subreddits = syphilisall %>%
|
|
||||||
filter(!is.na(Year) & Year != 2021, subreddit != "") %>%
|
|
||||||
group_by(subreddit) %>%
|
|
||||||
summarise(total_posts = n(), .groups = "drop") %>%
|
|
||||||
top_n(10, total_posts)
|
|
||||||
|
|
||||||
posts_per_year = syphilisall %>%
|
|
||||||
filter(subreddit %in% top_subreddits$subreddit, !is.na(Year) & Year != 2021) %>%
|
|
||||||
group_by(Year, subreddit) %>%
|
|
||||||
summarise(Number_of_Posts = n(), .groups = "drop")
|
|
||||||
|
|
||||||
ggplot(posts_per_year, aes(x = Year, y = Number_of_Posts, color = subreddit)) +
|
|
||||||
geom_line(aes(group = subreddit)) +
|
|
||||||
geom_point() +
|
|
||||||
labs(title = "Volume of Syphilis Traffic Over Time in Top Ten Subreddits",
|
|
||||||
x = "Year",
|
|
||||||
y = "Number of Posts per Subreddit")
|
|
||||||
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
```{r}
|
|
||||||
|
|
||||||
top_subreddits = syphilistotal %>%
|
|
||||||
filter(!is.na(Year) & Year != 2021) %>%
|
|
||||||
group_by(subreddit) %>%
|
|
||||||
summarise(total_posts = n(), .groups = "drop") %>%
|
|
||||||
top_n(10, total_posts)
|
|
||||||
|
|
||||||
posts_per_year = syphilistotal %>%
|
|
||||||
filter(subreddit %in% top_subreddits$subreddit, !is.na(Year) & Year != 2021) %>%
|
|
||||||
group_by(Year, subreddit) %>%
|
|
||||||
summarise(Number_of_Posts = n(), .groups = "drop")
|
|
||||||
|
|
||||||
ggplot(posts_per_year, aes(x = Year, y = Number_of_Posts, color = subreddit)) +
|
|
||||||
geom_line(aes(group = subreddit)) +
|
|
||||||
geom_point() +
|
|
||||||
labs(title = "Volume of Syphilis Traffic Over Time",
|
|
||||||
x = "Year",
|
|
||||||
y = "Number of Posts per Subreddit")
|
|
||||||
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user