adds my power analysis example
This commit is contained in:
parent
268ca229a2
commit
f4f5fd823f
49
R/calculatePower.R
Normal file
49
R/calculatePower.R
Normal file
@ -0,0 +1,49 @@
|
|||||||
|
##############################################################################
|
||||||
|
#
|
||||||
|
# Purpose:
|
||||||
|
# Use pilot project data to calculate power of a full study through simulation
|
||||||
|
#
|
||||||
|
# Parts:
|
||||||
|
# (0) - Setup
|
||||||
|
# (1) - Get the pilot data and clean it
|
||||||
|
# (2) - Run the model on the pilot data and extract effects
|
||||||
|
# (3) - Set up and run the simulation
|
||||||
|
# ====> Set variables at the arrows <====
|
||||||
|
#
|
||||||
|
##############################################################################
|
||||||
|
rm(list=ls())
|
||||||
|
set.seed(424242)
|
||||||
|
|
||||||
|
# (1) - Get the pilot data and clean it
|
||||||
|
source('~/Research/tor_wikipedia_edits/handcoded_edits/inter_coder_reliability_ns0.R')
|
||||||
|
d$nd <- to_logical(d$not.damaging, custom_true=c("Y"))
|
||||||
|
levels(d$source) <- c("IP-based Editors", "New Editors", "Registered Editors", "Tor-based Editors")
|
||||||
|
|
||||||
|
# (2) - Run the model on the pilot data
|
||||||
|
pilotM <- glm(nd ~ source, family=binomial(link="logit"), data=d)
|
||||||
|
summary(pilotM) #we expect effect sizes on this order
|
||||||
|
|
||||||
|
pilot.b0 <- coef(summary(pilotM))[1,1]
|
||||||
|
pilot.b1 <- coef(summary(pilotM))[2,1]
|
||||||
|
pilot.b2 <- coef(summary(pilotM))[3,1]
|
||||||
|
pilot.b3 <- coef(summary(pilotM))[4,1]
|
||||||
|
|
||||||
|
|
||||||
|
# (3) - Set up and run the simulation
|
||||||
|
|
||||||
|
source('powerAnalysis.R') #my little "lib"
|
||||||
|
|
||||||
|
#====>
|
||||||
|
nSims <- 5000 #how many simulations to run
|
||||||
|
n <- 100 #a guess for necessary sample size (per group)
|
||||||
|
#makeData(10) #DEBUGGING CODE -- you can uncomment this if you want to see it work
|
||||||
|
#<====
|
||||||
|
|
||||||
|
print("Levels are:")
|
||||||
|
print(levels(d$source))
|
||||||
|
powerCheck(n, nSims)
|
||||||
|
|
||||||
|
#Sample values
|
||||||
|
powerCheck(50, 100)
|
||||||
|
powerCheck(80, 1000)
|
||||||
|
powerCheck(200, 5000)
|
54
R/powerAnalysis.R
Normal file
54
R/powerAnalysis.R
Normal file
@ -0,0 +1,54 @@
|
|||||||
|
# This is semi-generic code for doing a power analysis of a logistic regression with 4
|
||||||
|
# levels in a factor
|
||||||
|
# when there's some pilot values already available and defined
|
||||||
|
#modelled heavily the simulation example explained in:
|
||||||
|
#http://meeting.spsp.org/2016/sites/default/files/Lane%2C%20Hennes%2C%20West%20SPSP%20Power%20Workshop%202016.pdf
|
||||||
|
|
||||||
|
library('batman')
|
||||||
|
library('reshape')
|
||||||
|
|
||||||
|
l2p <- function(b) {
|
||||||
|
odds <- exp(b)
|
||||||
|
prob <- odds/(1+odds)
|
||||||
|
return(prob)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
makeData <- function(n) { #make a random dataset of size n
|
||||||
|
#4 group IDs
|
||||||
|
tDF <- data.frame(
|
||||||
|
Group0=rbinom(n=n, size=1, prob=l2p(pilot.b0)), #ASK: what about se in pilot data?
|
||||||
|
Group1=rbinom(n=n, size=1, prob=l2p(pilot.b0 + pilot.b1)), # shouldn't my probs
|
||||||
|
Group2=rbinom(n=n, size=1, prob=l2p(pilot.b0 + pilot.b2)), # include se?
|
||||||
|
Group3=rbinom(n=n, size=1, prob=l2p(pilot.b0 + pilot.b3)))
|
||||||
|
sDF <- melt(tDF, id.vars = 0) #AKA the index is the unique id, as far as that goes
|
||||||
|
colnames(sDF) <- c('source', 'nd')
|
||||||
|
|
||||||
|
return(sDF)
|
||||||
|
}
|
||||||
|
|
||||||
|
powerCheck <- function(n, nSims) { #run a power calculation on the dataset given
|
||||||
|
#set up some empty arrays b/c R
|
||||||
|
signif0 <- rep(NA, nSims)
|
||||||
|
signif1 <- rep(NA, nSims)
|
||||||
|
signif2 <- rep(NA, nSims)
|
||||||
|
signif3 <- rep(NA, nSims)
|
||||||
|
signifM <- rep(NA, nSims)
|
||||||
|
for (s in 1:nSims) { # repeatedly we will....
|
||||||
|
simData <- makeData(n) # make some data
|
||||||
|
m1.sim <- glm(nd ~ source, # give the anticipated regression a try
|
||||||
|
family=binomial(link="logit"), data=simData)
|
||||||
|
p0 <- coef(summary(m1.sim))[1,4]
|
||||||
|
p1 <- coef(summary(m1.sim))[2,4]
|
||||||
|
p2 <- coef(summary(m1.sim))[3,4]
|
||||||
|
p3 <- coef(summary(m1.sim))[4,4]
|
||||||
|
signif0[s] <- p0 <=.05
|
||||||
|
signif1[s] <- p1 <=.05
|
||||||
|
signif2[s] <- p2 <=.05
|
||||||
|
signif3[s] <- p3 <=.05
|
||||||
|
signifM[s] <- p0 <=.05 & p1 <=.05 & p2 <=.05 & p3 <=.05
|
||||||
|
}
|
||||||
|
power <- c(mean(signif0), mean(signif1), mean(signif2), mean(signif3), mean(signifM))
|
||||||
|
return(power)
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user