59 lines
1.7 KiB
R
59 lines
1.7 KiB
R
setwd("~/Desktop/cdsc/health literacy/")
|
|
|
|
best = read.csv("Best_Data.csv")
|
|
|
|
# subsetting desired years
|
|
|
|
years = c("2007", "2010", "2013", "2016")
|
|
|
|
best_subsetted = best[best$Year %in% years,]
|
|
|
|
# creating empty rows for interpolation + interpolating the mean (stigma) scores
|
|
|
|
expand = function(df){
|
|
total_years = seq(min(df$Year), max(df$Year))
|
|
expanded_df = data.frame(Year = total_years)
|
|
unique_diseases = unique(df$Reconciled_Name)
|
|
expanded_data = data.frame()
|
|
|
|
for (disease in unique_diseases){
|
|
disease_data = df[df$Reconciled_Name == disease, ]
|
|
expanded_disease_data = merge(expanded_df, disease_data, by = "Year", all.x = TRUE)
|
|
expanded_disease_data$Reconciled_Name = disease
|
|
expanded_data = rbind(expanded_data, expanded_disease_data)
|
|
}
|
|
|
|
for (disease in unique_diseases) {
|
|
disease_data = expanded_data[expanded_data$Reconciled_Name == disease, ]
|
|
known_year = which(!is.na(disease_data$mean))
|
|
# sort known_year it by year
|
|
|
|
for (i in 1:(length(known_year) - 1)) {
|
|
before_year = known_year[i]
|
|
after_year = known_year[i + 1]
|
|
if (is.na(after_year)){
|
|
print("break")
|
|
break
|
|
}
|
|
start_mean = disease_data$mean[before_year]
|
|
end_mean = disease_data$mean[after_year]
|
|
num_missing = after_year - before_year - 1
|
|
print(after_year)
|
|
|
|
if (num_missing > 0) {
|
|
increment = (end_mean - start_mean) / (num_missing + 1)
|
|
|
|
for (j in 1:num_missing) {
|
|
disease_data$mean[before_year + j] = start_mean + increment * j
|
|
}
|
|
}
|
|
}
|
|
|
|
expanded_data[expanded_data$Reconciled_Name == disease, ] = disease_data
|
|
}
|
|
|
|
return(expanded_data)
|
|
}
|
|
|
|
best_interpolated = expand(best_subsetted)
|