diff --git a/dataset/interpolation_function.R b/dataset/interpolation_function.R new file mode 100644 index 0000000..53f8eaf --- /dev/null +++ b/dataset/interpolation_function.R @@ -0,0 +1,58 @@ +setwd("~/Desktop/cdsc/health literacy/") + +best = read.csv("Best_Data.csv") + +# subsetting desired years + +years = c("2007", "2010", "2013", "2016") + +best_subsetted = best[best$Year %in% years,] + +# creating empty rows for interpolation + interpolating the mean (stigma) scores + +expand = function(df){ + total_years = seq(min(df$Year), max(df$Year)) + expanded_df = data.frame(Year = total_years) + unique_diseases = unique(df$Reconciled_Name) + expanded_data = data.frame() + + for (disease in unique_diseases){ + disease_data = df[df$Reconciled_Name == disease, ] + expanded_disease_data = merge(expanded_df, disease_data, by = "Year", all.x = TRUE) + expanded_disease_data$Reconciled_Name = disease + expanded_data = rbind(expanded_data, expanded_disease_data) + } + + for (disease in unique_diseases) { + disease_data = expanded_data[expanded_data$Reconciled_Name == disease, ] + known_year = which(!is.na(disease_data$mean)) + # sort known_year it by year + + for (i in 1:(length(known_year) - 1)) { + before_year = known_year[i] + after_year = known_year[i + 1] + if (is.na(after_year)){ + print("break") + break + } + start_mean = disease_data$mean[before_year] + end_mean = disease_data$mean[after_year] + num_missing = after_year - before_year - 1 + print(after_year) + + if (num_missing > 0) { + increment = (end_mean - start_mean) / (num_missing + 1) + + for (j in 1:num_missing) { + disease_data$mean[before_year + j] = start_mean + increment * j + } + } + } + + expanded_data[expanded_data$Reconciled_Name == disease, ] = disease_data + } + + return(expanded_data) +} + +best_interpolated = expand(best_subsetted) diff --git a/dataset/map_and_merge.R b/dataset/map_and_merge.R new file mode 100644 index 0000000..05d0f9a --- /dev/null +++ b/dataset/map_and_merge.R @@ -0,0 +1,19 @@ +setwd("~/Desktop/cdsc/health literacy/") + +mapping = read.delim("Mapping - Sheet1.tsv") +mapping <- mapping[,colnames(mapping) != "Notes"] + +IHME = read.csv("IHME_Data.csv") +best <- read.csv("Best_Data.csv") + +# first, create the list of all the one-to-ones +d <- merge(best, mapping[!is.na(mapping$Mapping_Type) & mapping$Mapping_Type == "one-to-one",], by.x="Reconciled_Name", by.y="Best_Disease_Name", all=TRUE) + + +# now merge the imhe data on +d <- merge(d, IHME[IHME$measure_name == "DALYs (Disability-Adjusted Life Years)", + c("cause_id", "measure_name", "year", "val")], + by.x=c("IHME_Cause_ID", "Year"), + by.y=c("cause_id", "year")) +# next, handle the one-to-manys but summing up +