expanding matching for data
This commit is contained in:
parent
9e6c28de9f
commit
7bcf7ac4f8
2
R/EDA.R
2
R/EDA.R
@ -36,7 +36,7 @@ t.test(df1$up.fac.mean)
|
|||||||
# -0.1961401 -0.1647757
|
# -0.1961401 -0.1647757
|
||||||
df$mmt <- (df$contributors + (2 * df$collaborators)) / (df$contributors + df$collaborators)
|
df$mmt <- (df$contributors + (2 * df$collaborators)) / (df$contributors + df$collaborators)
|
||||||
df$old_mmt <- (df$contributors) / (df$contributors + df$collaborators)
|
df$old_mmt <- (df$contributors) / (df$contributors + df$collaborators)
|
||||||
t.test(df$mmt)
|
t.test(df$old_mmt)
|
||||||
# 95 percent confidence interval:
|
# 95 percent confidence interval:
|
||||||
# 1.610638 1.684438
|
# 1.610638 1.684438
|
||||||
#
|
#
|
@ -24,17 +24,20 @@ data1 <- read_csv('../power_data_111023_mmt.csv',show_col_types = FALSE)
|
|||||||
data2 <- read_csv('../inst_all_packages_full_results.csv')
|
data2 <- read_csv('../inst_all_packages_full_results.csv')
|
||||||
#d$nd <- to_logical(d$not.damaging, custom_true=c("Y"))
|
#d$nd <- to_logical(d$not.damaging, custom_true=c("Y"))
|
||||||
#levels(d$source) <- c("IP-based Editors", "New Editors", "Registered Editors", "Tor-based Editors")
|
#levels(d$source) <- c("IP-based Editors", "New Editors", "Registered Editors", "Tor-based Editors")
|
||||||
|
python_labeled <- as.numeric(data2$up.fac.mean[match(paste('python',tolower(data1$pkg), sep = "-"), data2$pkg)])
|
||||||
data1$up.fac.mean <- as.numeric(data2$up.fac.mean[match(data1$pkg, data2$pkg)])
|
same_labeled <- as.numeric(data2$up.fac.mean[match(tolower(data1$pkg), data2$pkg)])
|
||||||
data1$milestones <- as.numeric(data1$milestones > 0) + 1
|
data1$up.fac.mean <- pmin(python_labeled, same_labeled, na.rm=TRUE)
|
||||||
|
data1$milestones <- as.numeric(data1$milestones > 0)
|
||||||
# (2) - Run the model on the pilot data
|
# (2) - Run the model on the pilot data
|
||||||
data1$formal.score <- data1$mmt / (data1$milestones/data1$age)
|
data1$formal.score <- data1$mmt / (data1$milestones/data1$age)
|
||||||
table(data1$milestones)
|
table(data1$milestones)
|
||||||
hist(data1$mmt) #inequality of participation
|
hist(data1$old_mmt) #inequality of participation
|
||||||
hist(data1$formal.score)
|
hist(data1$formal.score)
|
||||||
hist(data1$age/365)
|
hist(data1$age/365)
|
||||||
kmodel1 <- lm(up.fac.mean ~ mmt, data=data1)
|
kmodel1 <- lm(up.fac.mean ~ mmt, data=data1)
|
||||||
summary(kmodel1)
|
summary(kmodel1)
|
||||||
|
kmodel1 <- lm(up.fac.mean ~ old_mmt, data=data1)
|
||||||
|
summary(kmodel1)
|
||||||
kmodel1 <- lm(up.fac.mean ~ formal.score, data=data1)
|
kmodel1 <- lm(up.fac.mean ~ formal.score, data=data1)
|
||||||
summary(kmodel1)
|
summary(kmodel1)
|
||||||
hist(data1$formal.score)
|
hist(data1$formal.score)
|
||||||
@ -48,7 +51,7 @@ g <- ggplot(data1, aes(x=formal.score, y=up.fac.mean)) +
|
|||||||
geom_smooth()
|
geom_smooth()
|
||||||
g
|
g
|
||||||
|
|
||||||
data2 <- subset(data1, (data1$age / 365) < 9 )
|
data2 <- subset(data1, (data1$age / 365) < 14 )
|
||||||
hist(data2$age)
|
hist(data2$age)
|
||||||
g <- ggplot(data2, aes(x=formal.score, y=up.fac.mean)) +
|
g <- ggplot(data2, aes(x=formal.score, y=up.fac.mean)) +
|
||||||
geom_point() +
|
geom_point() +
|
||||||
|
Loading…
Reference in New Issue
Block a user