From a2fe06a31c2bfc8e518d77f6d32f58fc6fd25dbb Mon Sep 17 00:00:00 2001 From: Kaylea Champion Date: Thu, 9 Nov 2023 10:10:40 -0800 Subject: [PATCH] adds this to git 'EDA' is Exploratory Data Analysis --- R/EDA.R | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 R/EDA.R diff --git a/R/EDA.R b/R/EDA.R new file mode 100644 index 0000000..67fd2ce --- /dev/null +++ b/R/EDA.R @@ -0,0 +1,20 @@ + +library(dplyr) +df<-read.csv('~/Research/kkex_repo/power_data_110923.csv') + +hist(df$age/365) #there's a big bump at 9 years, why? +hist(df$contributors) #skewed +hist(log(df$contributors)) #better +hist(df$collaborators) #skewed +hist(log(df$collaborators)) #better +hist(df$milestones) + +#one of many ways to generate a dichotomous variable +df$uses_milestones <- case_when(df$milestones != 0 ~ TRUE, + .default = FALSE) + +table(df$uses_milestones) + +#playing around +cor.test(df$contributors, as.numeric(df$uses_milestones)) +cor.test(df$collaborators, as.numeric(df$uses_milestones))