adds my power analysis example

2023-11-08 09:10:54 -08:00 · 2023-11-08 09:10:54 -08:00 · f4f5fd823f
commit f4f5fd823f
parent 268ca229a2
2 changed files with 103 additions and 0 deletions
--- a/R/calculatePower.R
+++ b/R/calculatePower.R
@ -0,0 +1,49 @@
+##############################################################################
+#
+# Purpose:
+# Use pilot project data to calculate power of a full study through simulation
+#
+# Parts:
+# (0) - Setup
+# (1) - Get the pilot data and clean it
+# (2) - Run the model on the pilot data and extract effects
+# (3) - Set up and run the simulation
+# ====> Set variables at the arrows <====
+#
+##############################################################################
+rm(list=ls())
+set.seed(424242) 
+
+# (1) - Get the pilot data and clean it
+source('~/Research/tor_wikipedia_edits/handcoded_edits/inter_coder_reliability_ns0.R')
+d$nd <- to_logical(d$not.damaging, custom_true=c("Y")) 
+levels(d$source) <- c("IP-based Editors", "New Editors", "Registered Editors", "Tor-based Editors")
+
+# (2) - Run the model on the pilot data
+pilotM <- glm(nd ~ source, family=binomial(link="logit"), data=d)
+summary(pilotM) #we expect effect sizes on this order
+
+pilot.b0 <- coef(summary(pilotM))[1,1] 
+pilot.b1 <- coef(summary(pilotM))[2,1]
+pilot.b2 <- coef(summary(pilotM))[3,1]
+pilot.b3 <- coef(summary(pilotM))[4,1] 
+
+
+# (3) - Set up and run the simulation
+
+source('powerAnalysis.R') #my little "lib"
+
+#====>
+nSims <- 5000 #how many simulations to run
+n <- 100 #a guess for necessary sample size (per group)
+#makeData(10) #DEBUGGING CODE -- you can uncomment this if you want to see it work
+#<====
+
+print("Levels are:")
+print(levels(d$source))
+powerCheck(n, nSims)
+
+#Sample values
+powerCheck(50, 100) 
+powerCheck(80, 1000) 
+powerCheck(200, 5000)
--- a/R/powerAnalysis.R
+++ b/R/powerAnalysis.R
@ -0,0 +1,54 @@
+# This is semi-generic code for doing a power analysis of a logistic regression with 4
+# levels in a factor
+# when there's some pilot values already available and defined
+#modelled heavily the simulation example explained in:
+#http://meeting.spsp.org/2016/sites/default/files/Lane%2C%20Hennes%2C%20West%20SPSP%20Power%20Workshop%202016.pdf
+
+library('batman')
+library('reshape')
+
+l2p <- function(b) {
+ odds <- exp(b) 
+ prob <- odds/(1+odds)
+ return(prob)
+}
+
+
+makeData <- function(n) { #make a random dataset of size n
+ #4 group IDs 
+ tDF <- data.frame(
+     Group0=rbinom(n=n, size=1, prob=l2p(pilot.b0)),        #ASK: what about se in pilot data?
+     Group1=rbinom(n=n, size=1, prob=l2p(pilot.b0 + pilot.b1)), # shouldn't my probs 
+     Group2=rbinom(n=n, size=1, prob=l2p(pilot.b0 + pilot.b2)), # include se?
+     Group3=rbinom(n=n, size=1, prob=l2p(pilot.b0 + pilot.b3)))
+ sDF <- melt(tDF, id.vars = 0) #AKA the index is the unique id, as far as that goes
+ colnames(sDF) <- c('source', 'nd')
+ 
+ return(sDF) 
+}
+
+powerCheck <- function(n, nSims) { #run a power calculation on the dataset given
+  #set up some empty arrays b/c R
+  signif0 <- rep(NA, nSims)
+  signif1 <- rep(NA, nSims)
+  signif2 <- rep(NA, nSims)
+  signif3 <- rep(NA, nSims)
+  signifM <- rep(NA, nSims)
+  for (s in 1:nSims) {           # repeatedly we will....
+    simData <- makeData(n)       # make some data
+    m1.sim <- glm(nd ~ source,   # give the anticipated regression a try
+                  family=binomial(link="logit"), data=simData)
+    p0 <- coef(summary(m1.sim))[1,4]
+    p1 <- coef(summary(m1.sim))[2,4]
+    p2 <- coef(summary(m1.sim))[3,4]
+    p3 <- coef(summary(m1.sim))[4,4]
+    signif0[s] <- p0 <=.05
+    signif1[s] <- p1 <=.05
+    signif2[s] <- p2 <=.05
+    signif3[s] <- p3 <=.05
+    signifM[s] <- p0 <=.05 & p1 <=.05 & p2 <=.05 & p3 <=.05
+  }  
+  power <- c(mean(signif0), mean(signif1), mean(signif2), mean(signif3), mean(signifM))
+  return(power)
+}
+