adds this to git
'EDA' is Exploratory Data Analysis
This commit is contained in:
parent
d2ac88a6df
commit
a2fe06a31c
20
R/EDA.R
Normal file
20
R/EDA.R
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
|
||||||
|
library(dplyr)
|
||||||
|
df<-read.csv('~/Research/kkex_repo/power_data_110923.csv')
|
||||||
|
|
||||||
|
hist(df$age/365) #there's a big bump at 9 years, why?
|
||||||
|
hist(df$contributors) #skewed
|
||||||
|
hist(log(df$contributors)) #better
|
||||||
|
hist(df$collaborators) #skewed
|
||||||
|
hist(log(df$collaborators)) #better
|
||||||
|
hist(df$milestones)
|
||||||
|
|
||||||
|
#one of many ways to generate a dichotomous variable
|
||||||
|
df$uses_milestones <- case_when(df$milestones != 0 ~ TRUE,
|
||||||
|
.default = FALSE)
|
||||||
|
|
||||||
|
table(df$uses_milestones)
|
||||||
|
|
||||||
|
#playing around
|
||||||
|
cor.test(df$contributors, as.numeric(df$uses_milestones))
|
||||||
|
cor.test(df$collaborators, as.numeric(df$uses_milestones))
|
Loading…
Reference in New Issue
Block a user