# This is semi-generic code for doing a power analysis of a logistic regression with 4 # levels in a factor # when there's some pilot values already available and defined #modelled heavily the simulation example explained in: #http://meeting.spsp.org/2016/sites/default/files/Lane%2C%20Hennes%2C%20West%20SPSP%20Power%20Workshop%202016.pdf library('batman') library('reshape') l2p <- function(b) { odds <- exp(b) prob <- odds/(1+odds) return(prob) } #Matt: makeDataNew <- function(n) { tDF <- data.frame( ## don't sim the outcome #up.fac.mean=rnorm(n=n, mean=-0.1296376, sd=1.479847), # up.fac.mean #mmt=rlnorm(n=n, mean=1.685715, sd = 0.2532059), # mmt new_mmt=rbeta(n=n, 5, 1), #mmt=rlogis(n=n, location = 1.685715), ## this generates a 50-50 split of milestones --v #milestones=rbinom(n=n, size=1, prob=c(0.247, 0.753)), #milestones milestones=rbinom(n=n, size=1, prob=.247), #milestones age=rnorm(n=n, mean=4351.578, sd=1408.811) # age ) #sDF <- melt(tDF, id.vars = 0) #AKA the index is the unique id, as far as that goes ## can name these in the data.frame constructor method directly #colnames(tDF) <- c('up.fac.mean', 'mmt', 'milestones', 'age') return(tDF) } makeDataNew2 <- function(n) { tDF <- data.frame( ## don't sim the outcome #up.fac.mean=rnorm(n=n, mean=-0.1296376, sd=1.479847), # up.fac.mean #formal.score=rlnorm(n=n, mean=6.220282, sd = 2.544058) # formal.score formal.score=rbeta(n=n, 1, 3) * 10000 ) tDF[is.na(tDF) | tDF=="Inf"] = NA #sDF <- melt(tDF, id.vars = 0) #AKA the index is the unique id, as far as that goes ##colnames(tDF) <- c('up.fac.mean', 'formal.score') return(tDF) } powerCheck <- function(n, nSims) { #run a power calculation on the dataset given #set up some empty arrays b/c R signif0 <- rep(NA, nSims) signif1 <- rep(NA, nSims) signif2 <- rep(NA, nSims) signif3 <- rep(NA, nSims) signifM <- rep(NA, nSims) for (s in 1:nSims) { # repeatedly we will.... simData <- makeDataNew(n) # make some data ## outcome goes here --v # e.g. simData$up.fac.mean <- (usefuleffsizeA * mmt) + (usefuleffsizeB * milestones) + rnorm(n=1, mean=0, sd=1) ##plus some noise #simData$up.fac.mean <- (-2.075 * simData$mmt) + (0.4284 * simData$milestones) + rnorm(n=1, mean=0, sd=1) simData$up.fac.mean <- (-1.38 * simData$new_mmt) + (0.40 * simData$milestones) + rnorm(n=n, mean=0, sd=1) #have updated for kkex through here, now need to look at the underproduction work #m1.sim <- lm(up.fac.mean ~ ((mmt)/ (milestones/age)), data=simData) ## could leave age out for now? #m1.sim <- lm(up.fac.mean ~ mmt + milestones + age, data=simData) m1.sim <- lm(up.fac.mean ~ new_mmt + milestones, data=simData) p0 <- coef(summary(m1.sim))[1,4] #intercept p1 <- coef(summary(m1.sim))[2,4] #mmt p2 <- coef(summary(m1.sim))[3,4] #milestones #p3 <- coef(summary(m1.sim))[4,4] #age signif0[s] <- p0 <=.05 signif1[s] <- p1 <=.05 signif2[s] <- p2 <=.05 #signif3[s] <- p3 <=.05 signifM[s] <- p0 <=.05 & p1 <=.05 & p2 <=.05 #& p3 <=.05 } power <- c(mean(signif0), mean(signif1), mean(signif2), mean(signif3), mean(signifM)) return(power) } powerCheck2 <- function(n, nSims) { #run a power calculation on the dataset given #set up some empty arrays b/c R signif0 <- rep(NA, nSims) signif1 <- rep(NA, nSims) signifM <- rep(NA, nSims) for (s in 1:nSims) { # repeatedly we will.... simData <- makeDataNew2(n) # make some data #have updated for kkex through here, now need to look at the underproduction work #m1.sim <- lm(up.fac.mean ~ ((mmt)/ (milestones/age)), data=simData) ## outcome goes here --v simData$up.fac.mean <- (0.00017 * simData$formal.score) + rnorm(n, mean=0, sd=1) ##plus some noise m1.sim <- lm(up.fac.mean ~ formal.score, data=simData) p0 <- coef(summary(m1.sim))[1,4] p1 <- coef(summary(m1.sim))[2,4] signif0[s] <- p0 <=.05 signif1[s] <- p1 <=.05 signifM[s] <- p0 <=.05 & p1 <=.05 } power <- c(mean(signif0), mean(signif1), mean(signifM)) return(power) }