diff --git a/old_folder/full_model.png b/old_folder/full_model.png deleted file mode 100644 index 7b14461..0000000 Binary files a/old_folder/full_model.png and /dev/null differ diff --git a/old_folder/h1_syphilis_overtime.png b/old_folder/h1_syphilis_overtime.png deleted file mode 100644 index d657376..0000000 Binary files a/old_folder/h1_syphilis_overtime.png and /dev/null differ diff --git a/old_folder/h2_syphilis_top_ten.png b/old_folder/h2_syphilis_top_ten.png deleted file mode 100644 index 06a2090..0000000 Binary files a/old_folder/h2_syphilis_top_ten.png and /dev/null differ diff --git a/old_folder/interpolation_function.R b/old_folder/interpolation_function.R deleted file mode 100644 index 53f8eaf..0000000 --- a/old_folder/interpolation_function.R +++ /dev/null @@ -1,58 +0,0 @@ -setwd("~/Desktop/cdsc/health literacy/") - -best = read.csv("Best_Data.csv") - -# subsetting desired years - -years = c("2007", "2010", "2013", "2016") - -best_subsetted = best[best$Year %in% years,] - -# creating empty rows for interpolation + interpolating the mean (stigma) scores - -expand = function(df){ - total_years = seq(min(df$Year), max(df$Year)) - expanded_df = data.frame(Year = total_years) - unique_diseases = unique(df$Reconciled_Name) - expanded_data = data.frame() - - for (disease in unique_diseases){ - disease_data = df[df$Reconciled_Name == disease, ] - expanded_disease_data = merge(expanded_df, disease_data, by = "Year", all.x = TRUE) - expanded_disease_data$Reconciled_Name = disease - expanded_data = rbind(expanded_data, expanded_disease_data) - } - - for (disease in unique_diseases) { - disease_data = expanded_data[expanded_data$Reconciled_Name == disease, ] - known_year = which(!is.na(disease_data$mean)) - # sort known_year it by year - - for (i in 1:(length(known_year) - 1)) { - before_year = known_year[i] - after_year = known_year[i + 1] - if (is.na(after_year)){ - print("break") - break - } - start_mean = disease_data$mean[before_year] - end_mean = disease_data$mean[after_year] - num_missing = after_year - before_year - 1 - print(after_year) - - if (num_missing > 0) { - increment = (end_mean - start_mean) / (num_missing + 1) - - for (j in 1:num_missing) { - disease_data$mean[before_year + j] = start_mean + increment * j - } - } - } - - expanded_data[expanded_data$Reconciled_Name == disease, ] = disease_data - } - - return(expanded_data) -} - -best_interpolated = expand(best_subsetted) diff --git a/old_folder/map_and_merge.R b/old_folder/map_and_merge.R deleted file mode 100644 index 05d0f9a..0000000 --- a/old_folder/map_and_merge.R +++ /dev/null @@ -1,19 +0,0 @@ -setwd("~/Desktop/cdsc/health literacy/") - -mapping = read.delim("Mapping - Sheet1.tsv") -mapping <- mapping[,colnames(mapping) != "Notes"] - -IHME = read.csv("IHME_Data.csv") -best <- read.csv("Best_Data.csv") - -# first, create the list of all the one-to-ones -d <- merge(best, mapping[!is.na(mapping$Mapping_Type) & mapping$Mapping_Type == "one-to-one",], by.x="Reconciled_Name", by.y="Best_Disease_Name", all=TRUE) - - -# now merge the imhe data on -d <- merge(d, IHME[IHME$measure_name == "DALYs (Disability-Adjusted Life Years)", - c("cause_id", "measure_name", "year", "val")], - by.x=c("IHME_Cause_ID", "Year"), - by.y=c("cause_id", "year")) -# next, handle the one-to-manys but summing up - diff --git a/old_folder/revised_model.png b/old_folder/revised_model.png deleted file mode 100644 index cbb5fed..0000000 Binary files a/old_folder/revised_model.png and /dev/null differ diff --git a/old_folder/syphilis_traffic.Rmd b/old_folder/syphilis_traffic.Rmd deleted file mode 100644 index ebbb0cc..0000000 --- a/old_folder/syphilis_traffic.Rmd +++ /dev/null @@ -1,142 +0,0 @@ ---- -title: "syphilis traffic" -output: html_document -date: "2024-11-19" ---- - -```{r} - -library(ggplot2) -library(dplyr) - -posts_per_year = syphilisall %>% - filter(!is.na(Year) & Year != 2021) %>% - group_by(Year) %>% - summarise(Number_of_Posts = n()) - -ggplot(posts_per_year, aes(x = Year, y = Number_of_Posts)) + - geom_line() + - geom_point() + - labs(title = "Volume of Syphilis Traffic Over Time", - x = "Year", - y = "Number of Posts Across All of Reddit") - - -``` - - - -```{r} - -sum(is.na(syphilisall$Year)) -sum(syphilisall$Year == "") - -``` - - - -```{r} - -posts_per_year = syphilisall %>% - filter(!is.na(Year) & Year != 2021) %>% # Exclude NA values and 2021 - group_by(Year, subreddit) %>% # Group by both 'Year' and 'subreddit' - summarise(Number_of_Posts = n(), .groups = "drop") # Count the number of posts per subreddit and year - -# Plotting the result -ggplot(posts_per_year, aes(x = Year, y = Number_of_Posts, color = subreddit)) + - geom_line(aes(group = subreddit)) + # Draw a line for each subreddit - geom_point() + # Add points for each year/subreddit combination - theme(legend.position="none") + - labs(title = "Volume of Syphilis Traffic Over Time", - x = "Year", - y = "Number of Posts per Subreddit") - -``` - -```{r} - -unique_values = unique(syphilisall$subreddit) - -length(unique_values) - -``` - -```{r} - -top_subreddits = syphilisall %>% - filter(!is.na(Year) & Year != 2021) %>% - group_by(subreddit) %>% - summarise(total_posts = n(), .groups = "drop") %>% - top_n(10, total_posts) - -posts_per_year = syphilisall %>% - filter(subreddit %in% top_subreddits$subreddit, !is.na(Year) & Year != 2021) %>% - group_by(Year, subreddit) %>% - summarise(Number_of_Posts = n(), .groups = "drop") - -ggplot(posts_per_year, aes(x = Year, y = Number_of_Posts, color = subreddit)) + - geom_line(aes(group = subreddit)) + - geom_point() + - labs(title = "Volume of Syphilis Traffic Over Time", - x = "Year", - y = "Number of Posts per Subreddit") - -``` - - -```{r} - -sum(is.na(syphilisall$subreddit)) - -sum(syphilisall$subreddit == "") - - -``` - - -```{r} - -top_subreddits = syphilisall %>% - filter(!is.na(Year) & Year != 2021, subreddit != "") %>% - group_by(subreddit) %>% - summarise(total_posts = n(), .groups = "drop") %>% - top_n(10, total_posts) - -posts_per_year = syphilisall %>% - filter(subreddit %in% top_subreddits$subreddit, !is.na(Year) & Year != 2021) %>% - group_by(Year, subreddit) %>% - summarise(Number_of_Posts = n(), .groups = "drop") - -ggplot(posts_per_year, aes(x = Year, y = Number_of_Posts, color = subreddit)) + - geom_line(aes(group = subreddit)) + - geom_point() + - labs(title = "Volume of Syphilis Traffic Over Time in Top Ten Subreddits", - x = "Year", - y = "Number of Posts per Subreddit") - -``` - - -```{r} - -top_subreddits = syphilistotal %>% - filter(!is.na(Year) & Year != 2021) %>% - group_by(subreddit) %>% - summarise(total_posts = n(), .groups = "drop") %>% - top_n(10, total_posts) - -posts_per_year = syphilistotal %>% - filter(subreddit %in% top_subreddits$subreddit, !is.na(Year) & Year != 2021) %>% - group_by(Year, subreddit) %>% - summarise(Number_of_Posts = n(), .groups = "drop") - -ggplot(posts_per_year, aes(x = Year, y = Number_of_Posts, color = subreddit)) + - geom_line(aes(group = subreddit)) + - geom_point() + - labs(title = "Volume of Syphilis Traffic Over Time", - x = "Year", - y = "Number of Posts per Subreddit") - -``` - -