From 48d1be7bf89d404f51e5cf7d7b5bb62db9adcd4c Mon Sep 17 00:00:00 2001 From: mjgaughan Date: Mon, 13 Nov 2023 22:30:40 -0600 Subject: [PATCH 1/2] updates to analysis --- R/calculatePower.R | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/R/calculatePower.R b/R/calculatePower.R index 29d3487..562a4dc 100644 --- a/R/calculatePower.R +++ b/R/calculatePower.R @@ -27,7 +27,7 @@ data2 <- read_csv('../inst_all_packages_full_results.csv') python_labeled <- as.numeric(data2$up.fac.mean[match(paste('python',tolower(data1$pkg), sep = "-"), data2$pkg)]) same_labeled <- as.numeric(data2$up.fac.mean[match(tolower(data1$pkg), data2$pkg)]) data1$up.fac.mean <- pmin(python_labeled, same_labeled, na.rm=TRUE) -data1$milestones <- as.numeric(data1$milestones > 0) +data1$milestones <- as.numeric(data1$milestones > 0) + 1 # (2) - Run the model on the pilot data data1$formal.score <- data1$mmt / (data1$milestones/data1$age) table(data1$milestones) @@ -46,7 +46,7 @@ cor.test(data1$mmt, data1$up.fac.mean) cor.test(data1$milestones, data1$up.fac.mean) cor.test(data1$age, data1$up.fac.mean) -g <- ggplot(data1, aes(x=formal.score, y=up.fac.mean)) + +g <- ggplot(data1, aes(x=mmt, y=up.fac.mean)) + geom_point() + geom_smooth() g @@ -67,10 +67,10 @@ summary(kmodel2) # family=gaussian(link='identity'), data=data1) summary(pilotM) #we expect effect sizes on this order -pilot.b0 <- coef(summary(pilotM))[1,1] -pilot.b1 <- coef(summary(pilotM))[2,1] -pilot.b2 <- coef(summary(pilotM))[3,1] -pilot.b3 <- coef(summary(pilotM))[4,1] +pilot.b0 <- coef(summary(kmodel2))[1,1] +pilot.b1 <- coef(summary(kmodel2))[2,1] +pilot.b2 <- coef(summary(kmodel2))[3,1] +pilot.b3 <- coef(summary(kmodel2))[4,1] # (3) - Set up and run the simulation From 48d4ea2851d07f951171c129d761b3091a7d0141 Mon Sep 17 00:00:00 2001 From: mjgaughan Date: Tue, 14 Nov 2023 09:01:02 -0600 Subject: [PATCH 2/2] faulty power analysis --- R/powerAnalysis.R | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/R/powerAnalysis.R b/R/powerAnalysis.R index 3c7d440..f4f205c 100644 --- a/R/powerAnalysis.R +++ b/R/powerAnalysis.R @@ -17,14 +17,8 @@ l2p <- function(b) { #Matt: makeDataNew <- function(n) { sDF <- data.frame( - sample(2793.638:3066.417, 1, replace = FALSE), - sample(27.6519:154.6866, 1, replace = FALSE), - sample(50.01884:96.77090, 1, replace = FALSE), - sample(c(1,2), 1, prob=(c(0.5,0.5)), replace = FALSE), - sample(1.610638: 1.684438, 1, replace = FALSE), - sample(-0.1961401:-0.1647757, 1, replace = FALSE) ) - colnames(sDF) <- c('age', 'contributors', 'collaborators', 'milestones', 'mmt', 'up.fac.mean') + colnames(sDF) <- c('formality', 'age', 'contributors', 'collaborators', 'milestones', 'mmt', 'up.fac.mean') return(sDF) } @@ -38,8 +32,7 @@ powerCheck <- function(n, nSims) { #run a power calculation on the dataset given for (s in 1:nSims) { # repeatedly we will.... simData <- makeDataNew(n) # make some data #have updated for kkex through here, now need to look at the underproduction work - m1.sim <- glm(up.fac.mean ~ ((mmt)/ (milestones/age)), # give the anticipated regression a try - family=gaussian(link='identity'), data=simData) + m1.sim <- lm(up.fac.mean ~ ((mmt)/ (milestones/age)), data=simData) p0 <- coef(summary(m1.sim))[1,4] p1 <- coef(summary(m1.sim))[1,4] p2 <- coef(summary(m1.sim))[1,4]