24_deb_pkg_gov/R/EDA.R
Kaylea Champion a2fe06a31c adds this to git
'EDA' is Exploratory Data Analysis
2023-11-09 10:10:40 -08:00

21 lines
607 B
R

library(dplyr)
df<-read.csv('~/Research/kkex_repo/power_data_110923.csv')
hist(df$age/365) #there's a big bump at 9 years, why?
hist(df$contributors) #skewed
hist(log(df$contributors)) #better
hist(df$collaborators) #skewed
hist(log(df$collaborators)) #better
hist(df$milestones)
#one of many ways to generate a dichotomous variable
df$uses_milestones <- case_when(df$milestones != 0 ~ TRUE,
.default = FALSE)
table(df$uses_milestones)
#playing around
cor.test(df$contributors, as.numeric(df$uses_milestones))
cor.test(df$collaborators, as.numeric(df$uses_milestones))