24_deb_pkg_gov/R/stale_scripts/EDA.R

42 lines
1.2 KiB
R
Raw Permalink Normal View History

library(dplyr)
2023-12-05 15:46:31 +00:00
df<-read.csv('../power_data_111023_mmt.csv')
df1 <- read.csv('../inst_all_packages_full_results.csv')
hist(df$age/365) #there's a big bump at 9 years, why?
hist(df$contributors) #skewed
hist(log(df$contributors)) #better
hist(df$collaborators) #skewed
hist(log(df$collaborators)) #better
hist(df$milestones)
#one of many ways to generate a dichotomous variable
df$uses_milestones <- case_when(df$milestones != 0 ~ TRUE,
.default = FALSE)
table(df$uses_milestones)
#playing around
cor.test(df$contributors, as.numeric(df$uses_milestones))
cor.test(df$collaborators, as.numeric(df$uses_milestones))
cor.test(df$contributors / df$collaborators, df$age / 365)
t.test(df$age)
#95 percent confidence interval:
# 2793.638 3066.417
t.test(df$contributors)
#95 percent confidence interval:
# 27.6519 154.6866
t.test(df$collaborators)
#95 percent confidence interval:
# 50.01884 96.77090
t.test(df1$up.fac.mean)
#95 percent confidence interval:
# -0.1961401 -0.1647757
df$mmt <- (df$contributors + (2 * df$collaborators)) / (df$contributors + df$collaborators)
2023-11-10 21:38:49 +00:00
df$old_mmt <- (df$contributors) / (df$contributors + df$collaborators)
2023-11-13 16:52:40 +00:00
t.test(df$old_mmt)
# 95 percent confidence interval:
# 1.610638 1.684438
#