diff --git a/R/calculatePower.R b/R/calculatePower.R index 562a4dc..30d4c6e 100644 --- a/R/calculatePower.R +++ b/R/calculatePower.R @@ -60,8 +60,12 @@ g data2$yearsOld <- data2$age / 365 -kmodel2 <- lm(up.fac.mean ~ mmt + milestones + yearsOld, data=data2) +kmodel2 <- lm(up.fac.mean ~ mmt + milestones + age, data=data1) +kmodel4 <- lm(up.fac.mean ~ mmt + age, data=data1) +kmodel3 <- lm(up.fac.mean ~ formal.score, data=data1) summary(kmodel2) +summary(kmodel3) +summary(kmodel4) #pilotM <- glm(up.fac.mean ~ ((mmt) / (milestones/age)), # give the anticipated regression a try # family=gaussian(link='identity'), data=data1) @@ -73,7 +77,13 @@ pilot.b2 <- coef(summary(kmodel2))[3,1] pilot.b3 <- coef(summary(kmodel2))[4,1] +summary(pilot.b3) + +qqline(data1$up.fac.mean) + +sd(data1$up.fac.mean) # (3) - Set up and run the simulation +qqline(data1$mmt) source('powerAnalysis.R') #my little "lib" @@ -86,8 +96,9 @@ n <- 100 #a guess for necessary sample size (per group) #print("Levels are:") #print(levels(d$source)) powerCheck(n, nSims) +#powerCheck2(n, nSims) like doesn't really work #Sample values -powerCheck(50, 100) +powerCheck(50, 1000) powerCheck(80, 1000) powerCheck(200, 5000) diff --git a/R/powerAnalysis.R b/R/powerAnalysis.R index f4f205c..639f3e0 100644 --- a/R/powerAnalysis.R +++ b/R/powerAnalysis.R @@ -16,12 +16,29 @@ l2p <- function(b) { #Matt: makeDataNew <- function(n) { - sDF <- data.frame( + tDF <- data.frame( + Group0=rnorm(n=n, mean=-0.1296376, sd=1.479847), # up.fac.mean + Group1=rlnorm(n=n, mean=1.685715, sd = 0.2532059), # mmt + Group2=rbinom(n=n, size=1, prob=c(0.247, 0.753)), #milestones + Group3=rnorm(n=n, mean=4351.578, sd=1408.811) # age ) - colnames(sDF) <- c('formality', 'age', 'contributors', 'collaborators', 'milestones', 'mmt', 'up.fac.mean') - return(sDF) + #sDF <- melt(tDF, id.vars = 0) #AKA the index is the unique id, as far as that goes + colnames(tDF) <- c('up.fac.mean', 'mmt', 'milestones', 'age') + return(tDF) } +makeDataNew2 <- function(n) { + tDF <- data.frame( + Group0=rnorm(n=n, mean=-0.1296376, sd=1.479847), # up.fac.mean + Group1=rlnorm(n=n, mean=6.220282, sd = 2.544058) # formal.score + ) + tDF[is.na(tDF) | tDF=="Inf"] = NA + #sDF <- melt(tDF, id.vars = 0) #AKA the index is the unique id, as far as that goes + colnames(tDF) <- c('up.fac.mean', 'formal.score') + return(tDF) +} + + powerCheck <- function(n, nSims) { #run a power calculation on the dataset given #set up some empty arrays b/c R signif0 <- rep(NA, nSims) @@ -32,7 +49,8 @@ powerCheck <- function(n, nSims) { #run a power calculation on the dataset given for (s in 1:nSims) { # repeatedly we will.... simData <- makeDataNew(n) # make some data #have updated for kkex through here, now need to look at the underproduction work - m1.sim <- lm(up.fac.mean ~ ((mmt)/ (milestones/age)), data=simData) + #m1.sim <- lm(up.fac.mean ~ ((mmt)/ (milestones/age)), data=simData) + m1.sim <- lm(up.fac.mean ~ mmt + milestones + age, data=simData) p0 <- coef(summary(m1.sim))[1,4] p1 <- coef(summary(m1.sim))[1,4] p2 <- coef(summary(m1.sim))[1,4] @@ -47,3 +65,25 @@ powerCheck <- function(n, nSims) { #run a power calculation on the dataset given return(power) } +powerCheck2 <- function(n, nSims) { #run a power calculation on the dataset given + #set up some empty arrays b/c R + signif0 <- rep(NA, nSims) + signif1 <- rep(NA, nSims) + signif2 <- rep(NA, nSims) + signif3 <- rep(NA, nSims) + signifM <- rep(NA, nSims) + for (s in 1:nSims) { # repeatedly we will.... + simData <- makeDataNew2(n) # make some data + #have updated for kkex through here, now need to look at the underproduction work + #m1.sim <- lm(up.fac.mean ~ ((mmt)/ (milestones/age)), data=simData) + m1.sim <- lm(up.fac.mean ~ formal.score, data=simData) + p0 <- coef(summary(m1.sim))[1,2] + p1 <- coef(summary(m1.sim))[1,2] + signif0[s] <- p0 <=.05 + signif1[s] <- p1 <=.05 + signifM[s] <- p0 <=.05 & p1 <=.05 + } + power <- c(mean(signif0), mean(signif1), mean(signifM)) + return(power) +} +