deleted files already in repository

2025-02-27 13:34:16 -08:00 · 2025-02-27 13:34:16 -08:00 · 855fa49611
commit 855fa49611
parent cd55f5bf69
7 changed files with 0 additions and 219 deletions
--- a/old_folder/full_model.png
+++ b/old_folder/full_model.png
--- a/old_folder/h1_syphilis_overtime.png
+++ b/old_folder/h1_syphilis_overtime.png
--- a/old_folder/h2_syphilis_top_ten.png
+++ b/old_folder/h2_syphilis_top_ten.png
--- a/old_folder/interpolation_function.R
+++ b/old_folder/interpolation_function.R
@ -1,58 +0,0 @@
-setwd("~/Desktop/cdsc/health literacy/")
-
-best = read.csv("Best_Data.csv")
-
-# subsetting desired years
-
-years = c("2007", "2010", "2013", "2016")
-
-best_subsetted = best[best$Year %in% years,]
-
-# creating empty rows for interpolation + interpolating the mean (stigma) scores 
-
-expand = function(df){
-  total_years = seq(min(df$Year), max(df$Year))
-  expanded_df = data.frame(Year = total_years)
-  unique_diseases = unique(df$Reconciled_Name)
-  expanded_data = data.frame()
-  
-  for (disease in unique_diseases){
-    disease_data = df[df$Reconciled_Name == disease, ]
-    expanded_disease_data = merge(expanded_df, disease_data, by = "Year", all.x = TRUE)
-    expanded_disease_data$Reconciled_Name = disease
-    expanded_data = rbind(expanded_data, expanded_disease_data)
-  }
-  
-  for (disease in unique_diseases) {
-    disease_data = expanded_data[expanded_data$Reconciled_Name == disease, ] 
-    known_year = which(!is.na(disease_data$mean))
-    # sort known_year it by year
-    
-    for (i in 1:(length(known_year) - 1)) {
-      before_year = known_year[i]
-      after_year = known_year[i + 1]
-      if (is.na(after_year)){
-        print("break")
-        break
-      }
-      start_mean = disease_data$mean[before_year]
-      end_mean = disease_data$mean[after_year]
-      num_missing = after_year - before_year - 1
-      print(after_year)  
-      
-      if (num_missing > 0) {
-        increment = (end_mean - start_mean) / (num_missing + 1)
-        
-        for (j in 1:num_missing) {
-          disease_data$mean[before_year + j] = start_mean + increment * j
-        }
-      }
-    }
-    
-    expanded_data[expanded_data$Reconciled_Name == disease, ] = disease_data
-  }
-  
-  return(expanded_data)
-}
-
-best_interpolated = expand(best_subsetted)
--- a/old_folder/map_and_merge.R
+++ b/old_folder/map_and_merge.R
@ -1,19 +0,0 @@
-setwd("~/Desktop/cdsc/health literacy/")
-
-mapping = read.delim("Mapping - Sheet1.tsv")
-mapping <- mapping[,colnames(mapping) != "Notes"]
-
-IHME = read.csv("IHME_Data.csv")
-best <- read.csv("Best_Data.csv")
-
-# first, create the list of all the one-to-ones
-d <- merge(best, mapping[!is.na(mapping$Mapping_Type) & mapping$Mapping_Type == "one-to-one",], by.x="Reconciled_Name", by.y="Best_Disease_Name", all=TRUE)
-
-
-# now merge the imhe data on
-d <-  merge(d, IHME[IHME$measure_name == "DALYs (Disability-Adjusted Life Years)", 
-                  c("cause_id", "measure_name", "year", "val")],
-          by.x=c("IHME_Cause_ID", "Year"),
-          by.y=c("cause_id", "year"))
-# next, handle the one-to-manys but summing up 
-
--- a/old_folder/revised_model.png
+++ b/old_folder/revised_model.png
--- a/old_folder/syphilis_traffic.Rmd
+++ b/old_folder/syphilis_traffic.Rmd
@ -1,142 +0,0 @@
---
-title: "syphilis traffic"
-output: html_document
-date: "2024-11-19"
---
-
-```{r}
-
-library(ggplot2)
-library(dplyr)
-
-posts_per_year = syphilisall %>%
-  filter(!is.na(Year) & Year != 2021) %>%
-  group_by(Year) %>%
-  summarise(Number_of_Posts = n())
-
-ggplot(posts_per_year, aes(x = Year, y = Number_of_Posts)) +
-  geom_line() + 
-  geom_point() +  
-  labs(title = "Volume of Syphilis Traffic Over Time",
-       x = "Year",
-       y = "Number of Posts Across All of Reddit")
-
-
-```
-
-
-
-```{r}
-
-sum(is.na(syphilisall$Year))
-sum(syphilisall$Year == "")
-
-```
-
-
-
-```{r}
-
-posts_per_year = syphilisall %>%
-  filter(!is.na(Year) & Year != 2021) %>%  # Exclude NA values and 2021
-  group_by(Year, subreddit) %>%            # Group by both 'Year' and 'subreddit'
-  summarise(Number_of_Posts = n(), .groups = "drop")  # Count the number of posts per subreddit and year
-
-# Plotting the result
-ggplot(posts_per_year, aes(x = Year, y = Number_of_Posts, color = subreddit)) +
-  geom_line(aes(group = subreddit)) +   # Draw a line for each subreddit
-  geom_point() +                        # Add points for each year/subreddit combination
-  theme(legend.position="none") +
-  labs(title = "Volume of Syphilis Traffic Over Time",
-       x = "Year",
-       y = "Number of Posts per Subreddit")
-
-```
-
-```{r}
-
-unique_values = unique(syphilisall$subreddit)
-
-length(unique_values)
-
-```
-
-```{r}
-
-top_subreddits = syphilisall %>%
-  filter(!is.na(Year) & Year != 2021) %>%
-  group_by(subreddit) %>%
-  summarise(total_posts = n(), .groups = "drop") %>%
-  top_n(10, total_posts) 
-
-posts_per_year = syphilisall %>%
-  filter(subreddit %in% top_subreddits$subreddit, !is.na(Year) & Year != 2021) %>%
-  group_by(Year, subreddit) %>%
-  summarise(Number_of_Posts = n(), .groups = "drop")
-
-ggplot(posts_per_year, aes(x = Year, y = Number_of_Posts, color = subreddit)) +
-  geom_line(aes(group = subreddit)) + 
-  geom_point() +  
-  labs(title = "Volume of Syphilis Traffic Over Time",
-       x = "Year",
-       y = "Number of Posts per Subreddit")
-
-```
-
-
-```{r}
-
-sum(is.na(syphilisall$subreddit)) 
-
-sum(syphilisall$subreddit == "")
-
-
-```
-
-
-```{r}
-
-top_subreddits = syphilisall %>%
-  filter(!is.na(Year) & Year != 2021, subreddit != "") %>%
-  group_by(subreddit) %>%
-  summarise(total_posts = n(), .groups = "drop") %>%
-  top_n(10, total_posts) 
-
-posts_per_year = syphilisall %>%
-  filter(subreddit %in% top_subreddits$subreddit, !is.na(Year) & Year != 2021) %>%
-  group_by(Year, subreddit) %>%
-  summarise(Number_of_Posts = n(), .groups = "drop")
-
-ggplot(posts_per_year, aes(x = Year, y = Number_of_Posts, color = subreddit)) +
-  geom_line(aes(group = subreddit)) + 
-  geom_point() +  
-  labs(title = "Volume of Syphilis Traffic Over Time in Top Ten Subreddits",
-       x = "Year",
-       y = "Number of Posts per Subreddit")
-
-```
-
-
-```{r}
-
-top_subreddits = syphilistotal %>%
-  filter(!is.na(Year) & Year != 2021) %>%
-  group_by(subreddit) %>%
-  summarise(total_posts = n(), .groups = "drop") %>%
-  top_n(10, total_posts) 
-
-posts_per_year = syphilistotal %>%
-  filter(subreddit %in% top_subreddits$subreddit, !is.na(Year) & Year != 2021) %>%
-  group_by(Year, subreddit) %>%
-  summarise(Number_of_Posts = n(), .groups = "drop")
-
-ggplot(posts_per_year, aes(x = Year, y = Number_of_Posts, color = subreddit)) +
-  geom_line(aes(group = subreddit)) + 
-  geom_point() +  
-  labs(title = "Volume of Syphilis Traffic Over Time",
-       x = "Year",
-       y = "Number of Posts per Subreddit")
-
-```
-
-