setwd("~/Desktop/cdsc/health literacy/") best = read.csv("Best_Data.csv") # subsetting desired years years = c("2007", "2010", "2013", "2016") best_subsetted = best[best$Year %in% years,] # creating empty rows for interpolation + interpolating the mean (stigma) scores expand = function(df){ total_years = seq(min(df$Year), max(df$Year)) expanded_df = data.frame(Year = total_years) unique_diseases = unique(df$Reconciled_Name) expanded_data = data.frame() for (disease in unique_diseases){ disease_data = df[df$Reconciled_Name == disease, ] expanded_disease_data = merge(expanded_df, disease_data, by = "Year", all.x = TRUE) expanded_disease_data$Reconciled_Name = disease expanded_data = rbind(expanded_data, expanded_disease_data) } for (disease in unique_diseases) { disease_data = expanded_data[expanded_data$Reconciled_Name == disease, ] known_year = which(!is.na(disease_data$mean)) # sort known_year it by year for (i in 1:(length(known_year) - 1)) { before_year = known_year[i] after_year = known_year[i + 1] if (is.na(after_year)){ print("break") break } start_mean = disease_data$mean[before_year] end_mean = disease_data$mean[after_year] num_missing = after_year - before_year - 1 print(after_year) if (num_missing > 0) { increment = (end_mean - start_mean) / (num_missing + 1) for (j in 1:num_missing) { disease_data$mean[before_year + j] = start_mean + increment * j } } } expanded_data[expanded_data$Reconciled_Name == disease, ] = disease_data } return(expanded_data) } best_interpolated = expand(best_subsetted)