merge in your latest

Merge branch 'master' of code.communitydata.science:kkex_repo
This commit is contained in:
Kaylea Champion 2023-11-14 08:40:33 -08:00
commit 147bfb7bec
2 changed files with 8 additions and 15 deletions

View File

@ -27,7 +27,7 @@ data2 <- read_csv('../inst_all_packages_full_results.csv')
python_labeled <- as.numeric(data2$up.fac.mean[match(paste('python',tolower(data1$pkg), sep = "-"), data2$pkg)]) python_labeled <- as.numeric(data2$up.fac.mean[match(paste('python',tolower(data1$pkg), sep = "-"), data2$pkg)])
same_labeled <- as.numeric(data2$up.fac.mean[match(tolower(data1$pkg), data2$pkg)]) same_labeled <- as.numeric(data2$up.fac.mean[match(tolower(data1$pkg), data2$pkg)])
data1$up.fac.mean <- pmin(python_labeled, same_labeled, na.rm=TRUE) data1$up.fac.mean <- pmin(python_labeled, same_labeled, na.rm=TRUE)
data1$milestones <- as.numeric(data1$milestones > 0) data1$milestones <- as.numeric(data1$milestones > 0) + 1
# (2) - Run the model on the pilot data # (2) - Run the model on the pilot data
data1$formal.score <- data1$mmt / (data1$milestones/data1$age) data1$formal.score <- data1$mmt / (data1$milestones/data1$age)
table(data1$milestones) table(data1$milestones)
@ -46,7 +46,7 @@ cor.test(data1$mmt, data1$up.fac.mean)
cor.test(data1$milestones, data1$up.fac.mean) cor.test(data1$milestones, data1$up.fac.mean)
cor.test(data1$age, data1$up.fac.mean) cor.test(data1$age, data1$up.fac.mean)
g <- ggplot(data1, aes(x=formal.score, y=up.fac.mean)) + g <- ggplot(data1, aes(x=mmt, y=up.fac.mean)) +
geom_point() + geom_point() +
geom_smooth() geom_smooth()
g g
@ -67,10 +67,10 @@ summary(kmodel2)
# family=gaussian(link='identity'), data=data1) # family=gaussian(link='identity'), data=data1)
summary(pilotM) #we expect effect sizes on this order summary(pilotM) #we expect effect sizes on this order
pilot.b0 <- coef(summary(pilotM))[1,1] pilot.b0 <- coef(summary(kmodel2))[1,1]
pilot.b1 <- coef(summary(pilotM))[2,1] pilot.b1 <- coef(summary(kmodel2))[2,1]
pilot.b2 <- coef(summary(pilotM))[3,1] pilot.b2 <- coef(summary(kmodel2))[3,1]
pilot.b3 <- coef(summary(pilotM))[4,1] pilot.b3 <- coef(summary(kmodel2))[4,1]
# (3) - Set up and run the simulation # (3) - Set up and run the simulation

View File

@ -17,14 +17,8 @@ l2p <- function(b) {
#Matt: #Matt:
makeDataNew <- function(n) { makeDataNew <- function(n) {
sDF <- data.frame( sDF <- data.frame(
sample(2793.638:3066.417, 1, replace = FALSE),
sample(27.6519:154.6866, 1, replace = FALSE),
sample(50.01884:96.77090, 1, replace = FALSE),
sample(c(1,2), 1, prob=(c(0.5,0.5)), replace = FALSE),
sample(1.610638: 1.684438, 1, replace = FALSE),
sample(-0.1961401:-0.1647757, 1, replace = FALSE)
) )
colnames(sDF) <- c('age', 'contributors', 'collaborators', 'milestones', 'mmt', 'up.fac.mean') colnames(sDF) <- c('formality', 'age', 'contributors', 'collaborators', 'milestones', 'mmt', 'up.fac.mean')
return(sDF) return(sDF)
} }
@ -38,8 +32,7 @@ powerCheck <- function(n, nSims) { #run a power calculation on the dataset given
for (s in 1:nSims) { # repeatedly we will.... for (s in 1:nSims) { # repeatedly we will....
simData <- makeDataNew(n) # make some data simData <- makeDataNew(n) # make some data
#have updated for kkex through here, now need to look at the underproduction work #have updated for kkex through here, now need to look at the underproduction work
m1.sim <- glm(up.fac.mean ~ ((mmt)/ (milestones/age)), # give the anticipated regression a try m1.sim <- lm(up.fac.mean ~ ((mmt)/ (milestones/age)), data=simData)
family=gaussian(link='identity'), data=simData)
p0 <- coef(summary(m1.sim))[1,4] p0 <- coef(summary(m1.sim))[1,4]
p1 <- coef(summary(m1.sim))[1,4] p1 <- coef(summary(m1.sim))[1,4]
p2 <- coef(summary(m1.sim))[1,4] p2 <- coef(summary(m1.sim))[1,4]