From 1a5b69851290a655221ba5d73ec34d4eea1d1f88 Mon Sep 17 00:00:00 2001 From: Kaylea Champion Date: Fri, 10 Nov 2023 11:22:58 -0800 Subject: [PATCH] demos some changes --- R/calculatePower.R | 39 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 36 insertions(+), 3 deletions(-) diff --git a/R/calculatePower.R b/R/calculatePower.R index 07f0c80..6429216 100644 --- a/R/calculatePower.R +++ b/R/calculatePower.R @@ -15,20 +15,53 @@ rm(list=ls()) set.seed(424242) library(readr) +library(ggplot2) # (1) - Get the pilot data and clean it #source('~/Research/tor_wikipedia_edits/handcoded_edits/inter_coder_reliability_ns0.R') #source ('/data/users/mgaughan/kkex_data_110823_3') data1 <- read_csv('../power_data_110923_mmt.csv',show_col_types = FALSE) -data2 <- read_csv('/data_ext/users/kcz1100/kaylea_dissertation/collabnetXS/raw_data/inst_all_packages_full_results.csv') +data2 <- read_csv('../inst_all_packages_full_results.csv') #d$nd <- to_logical(d$not.damaging, custom_true=c("Y")) #levels(d$source) <- c("IP-based Editors", "New Editors", "Registered Editors", "Tor-based Editors") data1$up.fac.mean <- as.numeric(data2$up.fac.mean[match(data1$pkg, data2$pkg)]) data1$milestones <- as.numeric(data1$milestones > 0) + 1 # (2) - Run the model on the pilot data -pilotM <- glm(up.fac.mean ~ ((mmt) / (milestones/age)), # give the anticipated regression a try - family=gaussian(link='identity'), data=data1) +data1$formal.score <- data1$mmt / (data1$milestones/data1$age) +table(data1$milestones) +hist(data1$mmt) #inequality of participation +hist(data1$formal.score) +hist(data1$age/365) +kmodel1 <- lm(up.fac.mean ~ mmt, data=data1) +summary(kmodel1) +kmodel1 <- lm(up.fac.mean ~ formal.score, data=data1) +summary(kmodel1) +hist(data1$formal.score) +cor.test(data1$formal.score, data1$up.fac.mean) +cor.test(data1$mmt, data1$up.fac.mean) +cor.test(data1$milestones, data1$up.fac.mean) +cor.test(data1$age, data1$up.fac.mean) + +g <- ggplot(data1, aes(x=formal.score, y=up.fac.mean)) + + geom_point() + + geom_smooth() +g + +data2 <- subset(data1, (data1$age / 365) < 9 ) +hist(data2$age) +g <- ggplot(data2, aes(x=formal.score, y=up.fac.mean)) + + geom_point() + + geom_smooth() +g + +data2$yearsOld <- data2$age / 365 + +kmodel2 <- lm(up.fac.mean ~ mmt + milestones + yearsOld, data=data2) +summary(kmodel2) + +#pilotM <- glm(up.fac.mean ~ ((mmt) / (milestones/age)), # give the anticipated regression a try +# family=gaussian(link='identity'), data=data1) summary(pilotM) #we expect effect sizes on this order pilot.b0 <- coef(summary(pilotM))[1,1]