513 lines
24 KiB
R
513 lines
24 KiB
R
geom_point() +
|
|
geom_smooth(mapping = aes(x=age_of_project, y=total_community))+
|
|
xlab("Age of the Project") +
|
|
ylab("Underproduction Factor")
|
|
g4
|
|
g4 <- ggplot(data7, aes(x= (age_of_the_project /12), y=total_community)) +
|
|
geom_point() +
|
|
geom_smooth(mapping = aes(x=age_of_project, y=total_community))+
|
|
xlab("Age of the Project") +
|
|
ylab("Underproduction Factor")
|
|
g4
|
|
g4 <- ggplot(data7, aes(x=age_of_the_project, y=total_community)) +
|
|
geom_point() +
|
|
geom_smooth(mapping = aes(x=age_of_project, y=total_community))+
|
|
xlab("Age of the Project") +
|
|
ylab("Underproduction Factor")
|
|
g4
|
|
g4 <- ggplot(data7, aes(x=age_of_the_project, y=total_community)) +
|
|
geom_point() +
|
|
geom_smooth(mapping = aes(x=age_of_project, y=total_community))+
|
|
xlab("Age of the Project") +
|
|
ylab("Underproduction Factor")
|
|
g4
|
|
g4 <- ggplot(data7, aes(x=age_of_project/12, y=total_community)) +
|
|
geom_point() +
|
|
geom_smooth(mapping = aes(x=age_of_project, y=total_community))+
|
|
xlab("Age of the Proje") +
|
|
ylab("Underproduction Factor")
|
|
g4
|
|
g4 <- ggplot(data7, aes(x=age_of_project/12, y=total_community)) +
|
|
geom_point() +
|
|
geom_smooth(mapping = aes(x=age_of_project/12, y=total_community))+
|
|
xlab("Age of the Proje") +
|
|
ylab("Underproduction Factor")
|
|
g4
|
|
g4 <- ggplot(data7, aes(x=age_of_project/365, y=total_community)) +
|
|
geom_point() +
|
|
geom_smooth(mapping = aes(x=age_of_project/365, y=total_community))+
|
|
xlab("Age of the Proje") +
|
|
ylab("Underproduction Factor")
|
|
g4
|
|
g4 <- ggplot(data7, aes(x=age_of_project/365, y=total_community)) +
|
|
geom_point() +
|
|
geom_smooth(mapping = aes(x=age_of_project/365, y=total_community))+
|
|
xlab("Age of the Project (years)") +
|
|
ylab("Contributor Community Population")
|
|
g4
|
|
g4 <- ggplot(data7, aes(x=age_of_project/365, y=total_community)) +
|
|
geom_point() +
|
|
geom_smooth(mapping = aes(x=age_of_project/365, y=total_community, color=yellow))+
|
|
xlab("Age of the Project (years)") +
|
|
ylab("Contributor Community Population")
|
|
g4
|
|
g4 <- ggplot(data7, aes(x=age_of_project/365, y=total_community)) +
|
|
geom_point() +
|
|
geom_smooth(mapping = aes(x=age_of_project/365, y=total_community))+
|
|
xlab("Age of the Project (years)") +
|
|
ylab("Contributor Community Population")
|
|
g4
|
|
g4 <- ggplot(data7, aes(x=age_of_project/365, y=total_community)) +
|
|
geom_point() +
|
|
geom_smooth(mapping = aes(x=age_of_project/365, y=total_community))+
|
|
xlab("Age of the Project (years)") +
|
|
ylab("Contributor Community Population") +
|
|
theme_bw()
|
|
g4
|
|
g4 <- ggplot(data7, aes(x=age_of_project/365, y=total_community)) +
|
|
geom_point() +
|
|
geom_smooth(mapping = aes(x=age_of_project/365, y=total_community), color="black")+
|
|
xlab("Age of the Project (years)") +
|
|
ylab("Contributor Community Population") +
|
|
theme_bw()
|
|
g4
|
|
g4 <- ggplot(data7, aes(x=age_of_project/365, y=total_community)) +
|
|
geom_point() +
|
|
geom_smooth(mapping = aes(x=age_of_project/365, y=total_community), color="yellow")+
|
|
xlab("Age of the Project (years)") +
|
|
ylab("Contributor Community Population") +
|
|
theme_bw()
|
|
g4
|
|
g4 <- ggplot(data7, aes(x=age_of_project/365, y=total_community)) +
|
|
geom_point() +
|
|
geom_smooth(mapping = aes(x=age_of_project/365, y=total_community), color="red")+
|
|
xlab("Age of the Project (years)") +
|
|
ylab("Contributor Community Population") +
|
|
theme_bw()
|
|
g4
|
|
library(readr)
|
|
data6 <-read_csv('../kk_final_commentlist.csv', show_col_types=FALSE)
|
|
data6$total_community = data6$contributors + data6$collaborators
|
|
median(data6$total_community)
|
|
cor.test(data6$total_community, data6$age_of_project)
|
|
library(readr)
|
|
library(ggplot2)
|
|
data1 <- read_csv('../power_data_111023_mmt.csv',show_col_types = FALSE)
|
|
data1$up.fac.mean <- pmin(python_labeled, same_labeled, na.rm=TRUE)
|
|
data1$old_milestones <- data1$milestones
|
|
data1$new_milestones <- as.numeric(data1$milestones > 0) + 1
|
|
data1$new.age <- as.numeric(cut(data1$age/365, breaks=c(0,9,12,15,17), labels=c(1,2,3,4)))
|
|
data1$new.formal.score <- data1$mmt / (data1$new_milestones/data1$new.age)
|
|
data1$new.age.factor <- as.factor(data1$new.age)
|
|
g2 <- ggplot(data1, aes(x=mmt, y=up.fac.mean)) +
|
|
geom_point() +
|
|
geom_smooth(mapping = aes(x=mmt, y=underproduction_mean, color=new.age.factor),
|
|
method='lm', formula= y~x)
|
|
g2
|
|
data1$up.fac.mean <- pmin(python_labeled, same_labeled, na.rm=TRUE)
|
|
data1 <- read_csv('../power_data_111023_mmt.csv',show_col_types = FALSE)
|
|
data2 <- read_csv('../inst_all_packages_full_results.csv')
|
|
#d$nd <- to_logical(d$not.damaging, custom_true=c("Y"))
|
|
#levels(d$source) <- c("IP-based Editors", "New Editors", "Registered Editors", "Tor-based Editors")
|
|
python_labeled <- as.numeric(data2$up.fac.mean[match(paste('python',tolower(data1$pkg), sep = "-"), data2$pkg)])
|
|
same_labeled <- as.numeric(data2$up.fac.mean[match(tolower(data1$pkg), data2$pkg)])
|
|
data1$up.fac.mean <- pmin(python_labeled, same_labeled, na.rm=TRUE)
|
|
data1$old_milestones <- data1$milestones
|
|
data1$new_milestones <- as.numeric(data1$milestones > 0) + 1
|
|
data1$new.age.factor <- as.factor(data1$new.age)
|
|
data1$new.age <- as.numeric(cut(data1$age/365, breaks=c(0,9,12,15,17), labels=c(1,2,3,4)))
|
|
data1$new.formal.score <- data1$mmt / (data1$new_milestones/data1$new.age)
|
|
data1$new.age.factor <- as.factor(data1$new.age)
|
|
g2 <- ggplot(data1, aes(x=mmt, y=up.fac.mean)) +
|
|
geom_point() +
|
|
geom_smooth(mapping = aes(x=mmt, y=underproduction_mean, color=new.age.factor),
|
|
method='lm', formula= y~x)
|
|
g2
|
|
data1 <- read_csv('../power_data_111023_mmt.csv',show_col_types = FALSE)
|
|
data2 <- read_csv('../inst_all_packages_full_results.csv')
|
|
#d$nd <- to_logical(d$not.damaging, custom_true=c("Y"))
|
|
#levels(d$source) <- c("IP-based Editors", "New Editors", "Registered Editors", "Tor-based Editors")
|
|
python_labeled <- as.numeric(data2$up.fac.mean[match(paste('python',tolower(data1$pkg), sep = "-"), data2$pkg)])
|
|
same_labeled <- as.numeric(data2$up.fac.mean[match(tolower(data1$pkg), data2$pkg)])
|
|
data1$up.fac.mean <- pmin(python_labeled, same_labeled, na.rm=TRUE)
|
|
data1$old_milestones <- data1$milestones
|
|
data1$new_milestones <- as.numeric(data1$milestones > 0) + 1
|
|
# (2) - Run the model on the pilot data
|
|
data1$formal.score <- data1$mmt / (data1$old_milestones/data1$age)
|
|
data1$new.age <- as.numeric(cut(data1$age/365, breaks=c(0,9,12,15,17), labels=c(1,2,3,4)))
|
|
data1$new.formal.score <- data1$mmt / (data1$new_milestones/data1$new.age)
|
|
g2 <- ggplot(data1, aes(x=mmt, y=up.fac.mean)) +
|
|
geom_point() +
|
|
geom_smooth(mapping = aes(x=mmt, y=up.fac.mean, color=new.age.factor),
|
|
method='lm', formula= y~x)
|
|
g2
|
|
data1$new.age <- as.numeric(cut(data1$age/365, breaks=c(0,9,12,15,17), labels=c(1,2,3,4)))
|
|
data1$new.formal.score <- data1$mmt / (data1$new_milestones/data1$new.age)
|
|
data1$new.age.factor <- as.factor(data1$new.age)
|
|
g2 <- ggplot(data1, aes(x=mmt, y=up.fac.mean)) +
|
|
geom_point() +
|
|
geom_smooth(mapping = aes(x=mmt, y=up.fac.mean, color=new.age.factor),
|
|
method='lm', formula= y~x)
|
|
g2
|
|
g2 <- ggplot(data1, aes(x=mmt, y=up.fac.mean)) +
|
|
geom_point() +
|
|
geom_smooth(mapping = aes(x=mmt, y=up.fac.mean, color=new.age.factor),
|
|
method='lm', formula= y~x, se=FALSE)
|
|
g2
|
|
g2 <- ggplot(data1, aes(x=mmt, y=up.fac.mean)) +
|
|
geom_point() +
|
|
geom_smooth(mapping = aes(x=mmt, y=up.fac.mean, color=new.age.factor),
|
|
method='lm', formula= y~x, se=FALSE)+
|
|
xlab("MMT") +
|
|
ylab("Underproduction Factor") +
|
|
theme_bw()
|
|
g2
|
|
g <- ggplot(data1, aes(x=mmt, y=up.fac.mean)) +
|
|
geom_point() +
|
|
#geom_smooth( method="lm", formula=(y~x), colour = "orange")+
|
|
geom_abline(intercept=coef(mmtmodel1)[1], slope=coef(mmtmodel1)[2], colour = "orange", size=1)+
|
|
geom_errorbar(aes(ymin=y-yerr, ymax=y+yerr), width=0.09)+
|
|
labs(x="MMT", y="Mean Underproduction Factor", color = "Project Age Group") +
|
|
theme_bw()
|
|
g
|
|
g2 <- ggplot(data1, aes(x=mmt, y=up.fac.mean)) +
|
|
geom_point() +
|
|
geom_smooth(mapping = aes(x=mmt, y=up.fac.mean, color=new.age.factor),
|
|
method='lm', formula= y~x, se=FALSE)+
|
|
labs(x="MMT", y="Mean Underproduction Factor", color = "Project Age Group") +
|
|
theme_bw()
|
|
g2
|
|
g2 <- ggplot(data1, aes(x=mmt, y=up.fac.mean)) +
|
|
geom_point() +
|
|
geom_smooth(mapping = aes(x=mmt, y=up.fac.mean, color=new.age.factor),
|
|
method='lm', formula= y~x, se=FALSE)+
|
|
labs(x="MMT", y="Mean Underproduction Factor", color = "Project Age Group") +
|
|
scale_colour_manual(values=colors_legend, labels=c("0-9y", "9-12y", "12-15y","15-16y")) +
|
|
theme_bw()
|
|
g2
|
|
g2 <- ggplot(data1, aes(x=mmt, y=up.fac.mean)) +
|
|
geom_point() +
|
|
geom_smooth(mapping = aes(x=mmt, y=up.fac.mean, color=new.age.factor),
|
|
method='lm', formula= y~x, se=FALSE)+
|
|
labs(x="MMT", y="Mean Underproduction Factor", color = "Project Age Group") +
|
|
scale_colour_manual(values=colors_legend, labels=c("0-9y", "9-12y", "12-15y","15-16y")) +
|
|
theme_bw() +
|
|
theme(legend.position = c(0.05, 0.05), legend.justification = c("left", "bottom"))
|
|
g2
|
|
g2 <- ggplot(data1, aes(x=mmt, y=up.fac.mean)) +
|
|
geom_smooth(mapping = aes(x=mmt, y=up.fac.mean, color=new.age.factor),
|
|
method='lm', formula= y~x, se=FALSE)+
|
|
labs(x="MMT", y="Mean Underproduction Factor", color = "Project Age Group") +
|
|
scale_colour_manual(values=colors_legend, labels=c("0-9y", "9-12y", "12-15y","15-16y")) +
|
|
theme_bw() +
|
|
theme(legend.position = c(0.05, 0.05), legend.justification = c("left", "bottom"))
|
|
g2
|
|
g2 <- ggplot(data1, aes(x=mmt, y=up.fac.mean)) +
|
|
geom_smooth(mapping = aes(x=mmt, y=up.fac.mean, color=new.age.factor),
|
|
method='lm', formula= y~x, se=FALSE)+
|
|
labs(x="MMT", y="Mean Underproduction Factor", color = "Project Age Group") +
|
|
scale_colour_manual(values=colors_legend, labels=c("0-9y", "9-12y", "12-15y","15-16y")) +
|
|
theme_bw() +
|
|
theme(legend.position = c(0.05, 0.05), legend.justification = c("left", "bottom"))
|
|
g2
|
|
g2 <- ggplot(data1, aes(x=mmt, y=up.fac.mean)) +
|
|
geom_smooth(mapping = aes(x=mmt, y=up.fac.mean, color=new.age.factor),
|
|
method='lm', formula= y~x, se=FALSE)+
|
|
labs(x="MMT", y="Mean Underproduction Factor", color = "Project Age Group") +
|
|
theme_bw() +
|
|
theme(legend.position = c(0.05, 0.05), legend.justification = c("left", "bottom"))
|
|
g2
|
|
g2 <- ggplot(data1, aes(x=mmt, y=up.fac.mean)) +
|
|
geom_smooth(mapping = aes(x=mmt, y=up.fac.mean, color=new.age.factor),
|
|
method='lm', formula= y~x, se=FALSE)+
|
|
labs(x="MMT", y="Mean Underproduction Factor", color = "Project Age Group") +
|
|
scale_colour_manual(values=color_legend, labels=c("0-9y", "9-12y", "12-15y","15-16y")) +
|
|
theme_bw() +
|
|
theme(legend.position = c(0.05, 0.05), legend.justification = c("left", "bottom"))
|
|
g2
|
|
g2 <- ggplot(data1, aes(x=mmt, y=up.fac.mean)) +
|
|
geom_smooth(mapping = aes(x=mmt, y=up.fac.mean, color=new.age.factor),
|
|
method='lm', formula= y~x, se=FALSE)+
|
|
labs(x="MMT", y="Mean Underproduction Factor", color = "Project Age Group") +
|
|
scale_colour_manual( labels=c("0-9y", "9-12y", "12-15y","15-16y")) +
|
|
theme_bw() +
|
|
theme(legend.position = c(0.05, 0.05), legend.justification = c("left", "bottom"))
|
|
g2
|
|
g2 <- ggplot(data1, aes(x=mmt, y=up.fac.mean)) +
|
|
geom_smooth(mapping = aes(x=mmt, y=up.fac.mean, color=new.age.factor),
|
|
method='lm', formula= y~x, se=FALSE)+
|
|
labs(x="MMT", y="Mean Underproduction Factor", color = "Project Age Group") +
|
|
scale_colour_manual(values=legend, labels=c("0-9y", "9-12y", "12-15y","15-16y")) +
|
|
theme_bw() +
|
|
theme(legend.position = c(0.05, 0.05), legend.justification = c("left", "bottom"))
|
|
g2
|
|
g2 <- ggplot(data1, aes(x=mmt, y=up.fac.mean)) +
|
|
geom_smooth(mapping = aes(x=mmt, y=up.fac.mean, color=new.age.factor),
|
|
method='lm', formula= y~x, se=FALSE)+
|
|
labs(x="MMT", y="Mean Underproduction Factor", color = "Project Age Group") +
|
|
scale_colour_manual(values=legend.values, labels=c("0-9y", "9-12y", "12-15y","15-16y")) +
|
|
theme_bw() +
|
|
theme(legend.position = c(0.05, 0.05), legend.justification = c("left", "bottom"))
|
|
g2
|
|
g2 <- ggplot(data1, aes(x=mmt, y=up.fac.mean)) +
|
|
geom_smooth(mapping = aes(x=mmt, y=up.fac.mean, color=new.age.factor),
|
|
method='lm', formula= y~x, se=FALSE)+
|
|
labs(x="MMT", y="Mean Underproduction Factor", color = "Project Age Group") +
|
|
theme_bw() +
|
|
theme(legend.position = c(0.05, 0.05), legend.justification = c("left", "bottom"))
|
|
g2
|
|
data1$new.age.factor <- factor(data1$new.age, levels=c(1,2,3,4), labels=c("0-9y", "9-12y", "12-15y","15-16y"))
|
|
g2 <- ggplot(data1, aes(x=mmt, y=up.fac.mean)) +
|
|
geom_smooth(mapping = aes(x=mmt, y=up.fac.mean, color=new.age.factor),
|
|
method='lm', formula= y~x, se=FALSE)+
|
|
labs(x="MMT", y="Mean Underproduction Factor", color = "Project Age Group") +
|
|
theme_bw() +
|
|
theme(legend.position = c(0.05, 0.05), legend.justification = c("left", "bottom"))
|
|
g2
|
|
library(readr)
|
|
library(ggplot2)
|
|
library(tidyverse)
|
|
data7 <- read_csv('../final_data/kk_final_octo.csv', show_col_types = FALSE)
|
|
median(data7$underproduction_mean)
|
|
length(which(data7$underproduction_low < 0))
|
|
364 / 3843
|
|
data5 <- read_csv('../kk_final_readme_roster.csv', show_col_types=FALSE)
|
|
data5 <- read_csv('..final_data/kk_final_readme_roster.csv', show_col_types=FALSE)
|
|
data5 <- read_csv('../final_data/kk_final_readme_roster.csv', show_col_types=FALSE)
|
|
length(which(data5$underproduction_low < 0))
|
|
227/2695
|
|
#primary analysis for cross-sectional community metrics
|
|
overall_data <- read_csv('../final_data/deb_full_data.csv',show_col_types = FALSE)
|
|
rm(list=ls())
|
|
set.seed(424242)
|
|
library(readr)
|
|
library(ggplot2)
|
|
library(tidyverse)
|
|
#primary analysis for cross-sectional community metrics
|
|
overall_data <- read_csv('../final_data/deb_full_data.csv',show_col_types = FALSE)
|
|
octo_data <- read_csv('../final_data/deb_octo_data.csv', show_col_types = FALSE)
|
|
overall_data$mmt <- (((oveall_data1$collaborators * 2)+ overall_data$contributors) / (overall_data$contributors + overall_data$collaborators))
|
|
overall_data$mmt <- (((overall_data1$collaborators * 2)+ overall_data$contributors) / (overall_data$contributors + overall_data$collaborators))
|
|
overall_data$mmt <- (((overall_data$collaborators * 2)+ overall_data$contributors) / (overall_data$contributors + overall_data$collaborators))
|
|
mean(overall_data1$mmt)
|
|
mean(overall_data$mmt)
|
|
hist(overall_data$mmt, probability = TRUE)
|
|
overall_data$new.age <- as.numeric(cut(overall_data$age_of_project/365, breaks=c(0,9,12,15,17), labels=c(1,2,3,4)))
|
|
table(data1$new.age)
|
|
table(overall_data$new.age)
|
|
overall_data$new.age.factor <- as.factor(overall_data1$new.age)
|
|
overall_data$new.age.factor <- as.factor(overall_data$new.age)
|
|
hist(overall_data$new.age)
|
|
hist(overall_data$new.age.factor)
|
|
overall_data$new.age <- as.numeric(cut(overall_data$age_of_project/365, breaks=c(0,7,11,13,17), labels=c(1,2,3,4)))
|
|
table(overall_data$new.age)
|
|
overall_data$new.age.factor <- as.factor(overall_data$new.age)
|
|
hist(overall_data$new.age.factor)
|
|
overall_data$new.age <- as.numeric(cut(overall_data$age_of_project/365, breaks=c(0,7,11,13,17), labels=c(1,2,3,4)))
|
|
table(overall_data$new.age)
|
|
overall_data$new.age.factor <- as.factor(overall_data$new.age)
|
|
hist(overall_data$new.age.factor)
|
|
hist(overall_data$new.age)
|
|
overall_data$new.age <- as.numeric(cut(overall_data$age_of_project/365, breaks=c(0,8,11,13,17), labels=c(1,2,3,4)))
|
|
table(overall_data$new.age)
|
|
overall_data$new.age.factor <- as.factor(overall_data$new.age)
|
|
hist(overall_data$new.age)
|
|
overall_data$new.age <- as.numeric(cut(overall_data$age_of_project/365, breaks=c(0,8,11,15,17), labels=c(1,2,3,4)))
|
|
table(overall_data$new.age)
|
|
overall_data$new.age.factor <- as.factor(overall_data$new.age)
|
|
hist(overall_data$new.age)
|
|
overall_data$new.age <- as.numeric(cut(overall_data$age_of_project/365, breaks=c(0,8,11,14,17), labels=c(1,2,3,4)))
|
|
table(overall_data$new.age)
|
|
overall_data$new.age.factor <- as.factor(overall_data$new.age)
|
|
hist(overall_data$new.age)
|
|
overall_data$new.age <- as.numeric(cut(overall_data$age_of_project/365, breaks=c(0,7,11,14,17), labels=c(1,2,3,4)))
|
|
table(overall_data$new.age)
|
|
overall_data$new.age.factor <- as.factor(overall_data$new.age)
|
|
hist(overall_data$new.age)
|
|
overall_data$new.age <- as.numeric(cut(overall_data$age_of_project/365, breaks=c(0,7,10,14,17), labels=c(1,2,3,4)))
|
|
table(overall_data$new.age)
|
|
overall_data$new.age.factor <- as.factor(overall_data$new.age)
|
|
hist(overall_data$new.age)
|
|
overall_data$new.age <- as.numeric(cut(overall_data$age_of_project/365, breaks=c(0,7,10,13,17), labels=c(1,2,3,4)))
|
|
table(overall_data$new.age)
|
|
overall_data$new.age.factor <- as.factor(overall_data$new.age)
|
|
hist(overall_data$new.age)
|
|
data1$new.age <- as.numeric(cut(data1$age_of_project/365, breaks=c(0,9,12,15,17), labels=c(1,2,3,4)))
|
|
age_vector <- overall_data$age_of_project/365
|
|
order(age_vector)
|
|
order(age_vector)
|
|
quartile(age_vector)
|
|
quantile(age_vector)
|
|
overall_data$new.age <- as.numeric(cut(overall_data$age_of_project/365, breaks=c(0,7.524197,10.323056,13.649367,17), labels=c(1,2,3,4)))
|
|
table(overall_data$new.age)
|
|
overall_data$new.age.factor <- as.factor(overall_data$new.age)
|
|
hist(overall_data$new.age)
|
|
1159/5105
|
|
1391/5105
|
|
1277/5105
|
|
1276/510
|
|
1276/5105
|
|
#shows the cross-age downward slopes for all underproduction averages in the face of MMT
|
|
g3 <- ggplot(overall_data, aes(x=mmt, y=underproduction_mean)) +
|
|
geom_smooth(mapping = aes(x=mmt, y=underproduction_mean, color=new.age.factor),
|
|
method='lm', formula= y~x) +
|
|
xlab("MMT") +
|
|
ylab("Underproduction Factor") +
|
|
theme_bw()
|
|
g3
|
|
#shows the cross-age downward slopes for all underproduction averages in the face of MMT
|
|
g3 <- ggplot(overall_data, aes(x=mmt, y=underproduction_mean)) +
|
|
geom_point(mapping = aes(color=new.age.factor)) +
|
|
geom_smooth(mapping = aes(x=mmt, y=underproduction_mean, color=new.age.factor),
|
|
method='lm', formula= y~x) +
|
|
xlab("MMT") +
|
|
ylab("Underproduction Factor") +
|
|
theme_bw()
|
|
g3
|
|
#shows the cross-age downward slopes for all underproduction averages in the face of MMT
|
|
g3 <- ggplot(overall_data, aes(x=mmt, y=underproduction_mean)) +
|
|
geom_smooth(mapping = aes(x=mmt, y=underproduction_mean, color=new.age.factor),
|
|
method='lm', formula= y~x) +
|
|
xlab("MMT") +
|
|
ylab("Underproduction Factor") +
|
|
theme_bw()
|
|
g3
|
|
mmtmodel1 <- lm(up.fac.mean ~ mmt + new.age.factor, data=overall_data)
|
|
mmtmodel1 <- lm(underproduction_mean ~ mmt + new.age.factor, data=overall_data)
|
|
summary(mmtmodel1)
|
|
#shows the cross-age downward slopes for all underproduction averages in the face of MMT
|
|
g3 <- ggplot(overall_data, aes(x=mmt, y=underproduction_mean)) +
|
|
geom_smooth(mapping = aes(x=mmt, y=underproduction_mean, color=new.age.factor),
|
|
method='lm', formula= y~x) +
|
|
xlab("MMT") +
|
|
ylab("Underproduction Factor") +
|
|
theme_bw() +
|
|
theme(legend.position = c(0.05, 0.05), legend.justification = c("left", "bottom"))
|
|
g3
|
|
#shows the cross-age downward slopes for all underproduction averages in the face of MMT
|
|
g3 <- ggplot(overall_data, aes(x=mmt, y=underproduction_mean)) +
|
|
geom_smooth(mapping = aes(x=mmt, y=underproduction_mean, color=new.age.factor), formula= y~x) +
|
|
xlab("MMT") +
|
|
ylab("Underproduction Factor") +
|
|
theme_bw() +
|
|
theme(legend.position = c(0.05, 0.05), legend.justification = c("left", "bottom"))
|
|
g3
|
|
g4 <- ggplot(overall_data, aes(x=mmt, y=underproduction_mean)) +
|
|
geom_smooth(mapping = aes(x=mmt, y=underproduction_mean, color=new.age.factor)) +
|
|
xlab("MMT") +
|
|
ylab("Underproduction Factor") +
|
|
theme_bw() +
|
|
theme(legend.position = c(0.05, 0.05), legend.justification = c("left", "bottom"))
|
|
g4
|
|
g4 <- ggplot(overall_data, aes(x=mmt, y=underproduction_mean)) +
|
|
geom_smooth(mapping = aes(x=mmt, y=underproduction_mean, color=new.age.factor)) +
|
|
xlab("MMT") +
|
|
ylab("Underproduction Factor") +
|
|
theme_bw() +
|
|
theme(legend.position = c(0.0, 0.0), legend.justification = c("left", "bottom"))
|
|
g4
|
|
g4 <- ggplot(overall_data, aes(x=mmt, y=underproduction_mean)) +
|
|
geom_smooth(mapping = aes(x=mmt, y=underproduction_mean, color=new.age.factor)) +
|
|
xlab("MMT") +
|
|
ylab("Underproduction Factor") +
|
|
theme_bw() +
|
|
theme(legend.position = c(0.0, 0.0), legend.justification = c("right", "top"))
|
|
g4
|
|
g4 <- ggplot(overall_data, aes(x=mmt, y=underproduction_mean)) +
|
|
geom_smooth(mapping = aes(x=mmt, y=underproduction_mean, color=new.age.factor)) +
|
|
xlab("MMT") +
|
|
ylab("Underproduction Factor") +
|
|
theme_bw() +
|
|
theme(legend.position = c(1.0, 1.0), legend.justification = c("right", "top"))
|
|
g4
|
|
g4 <- ggplot(overall_data, aes(x=mmt, y=underproduction_mean)) +
|
|
geom_smooth(mapping = aes(x=mmt, y=underproduction_mean, color=new.age.factor)) +
|
|
xlab("MMT") +
|
|
ylab("Underproduction Factor") +
|
|
theme_bw() +
|
|
theme(legend.position = c(0.9, 1.0), legend.justification = c("right", "top"))
|
|
g4
|
|
g4 <- ggplot(overall_data, aes(x=mmt, y=underproduction_mean)) +
|
|
geom_smooth(mapping = aes(x=mmt, y=underproduction_mean, color=new.age.factor)) +
|
|
xlab("MMT") +
|
|
ylab("Underproduction Factor") +
|
|
theme_bw() +
|
|
theme(legend.position = c(0.9, 0.9), legend.justification = c("right", "top"))
|
|
g4
|
|
g4 <- ggplot(overall_data, aes(x=mmt, y=underproduction_mean)) +
|
|
geom_smooth(mapping = aes(x=mmt, y=underproduction_mean, color=new.age.factor), se=FALSE) +
|
|
xlab("MMT") +
|
|
ylab("Underproduction Factor") +
|
|
theme_bw() +
|
|
theme(legend.position = c(0.9, 0.9), legend.justification = c("right", "top"))
|
|
g4
|
|
min(overall_data$underproduction_mean)
|
|
max(overall_data$underproduction_mean)
|
|
octo_data <- read_csv('../final_data/deb_octo_data.csv', show_col_types = FALSE)
|
|
octo_data$new.age <- as.numeric(cut(octo_data$age_of_project/365, breaks=c(0,7.524197,10.323056,13.649367,17), labels=c(1,2,3,4)))
|
|
table(octo_data$new.age)
|
|
999 / 3842
|
|
1139/3842
|
|
955/3842
|
|
747/3842
|
|
octo_data$new.age.factor <- as.factor(octo_data$new.age)
|
|
hist(overall_data$new.age)
|
|
hist(octo_data$new.age)
|
|
octo_data$mmt <- (((octo_data$collaborators * 2)+ octo_data$contributors) / (octo_data$contributors + octo_data$collaborators))
|
|
mean(overall_data$mmt)
|
|
mean(octo_data$mmt)
|
|
hist(octo_data$mmt, probability = TRUE)
|
|
head(octo_data)
|
|
octo_data$issue_mmt <- (((octo_data$issue_contrib_count * 2)+ octo_data$api_contrib_count) / (octo_data$api_contrib_count))
|
|
hist(octo_data$issue_mmt, probability = TRUE)
|
|
octo_data$issue_mmt <- (((octo_data$issue_contrib_count * 2)+ octo_data$api_contrib_count + octo_data$wiki_contrib_count + octo_data$file_contrib_count) / (octo_data$api_contrib_count + + octo_data$wiki_contrib_count + octo_data$issue_contrib_count + octo_data$file_contrib_count))
|
|
hist(octo_data$issue_mmt, probability = TRUE)
|
|
octo_mmtmodel1 <- lm(underproduction_mean ~ mmt + new.age.factor, data=octo_data)
|
|
summary(octo_mmtmodel1)
|
|
issue_mmtmodel1 <- lm(underproduction_mean ~ issue_mmt + new.age.factor, data=octo_data)
|
|
summary(issue_mmtmodel1)
|
|
octo_data$wiki_mmt <- (((octo_data$wiki_contrib_count * 2)+ octo_data$api_contrib_count + octo_data$wiki_contrib_count + octo_data$file_contrib_count) / (octo_data$api_contrib_count + + octo_data$wiki_contrib_count + octo_data$issue_contrib_count + octo_data$file_contrib_count))
|
|
hist(octo_data$wiki_mmt, probability = TRUE)
|
|
wiki_mmtmodel1 <- lm(underproduction_mean ~ issue_mmt + new.age.factor, data=octo_data)
|
|
summary(wiki_mmtmodel1)
|
|
wiki_mmtmodel1 <- lm(underproduction_mean ~ wiki_mmt + new.age.factor, data=octo_data)
|
|
summary(wiki_mmtmodel1)
|
|
texreg(list(mmtmodel1, issue_mmtmodel1, wiki_mmtmodel1), stars=NULL, digits=2,
|
|
custom.model.names=c( 'MMT (Overall Dataset)'),
|
|
custom.coef.names=c('(Intercept)', 'MMT', 'Age-2', 'Age-3', 'Age-4'),
|
|
use.packages=FALSE, table=FALSE, ci.force = TRUE)
|
|
source('powerAnalysis.R') #my little "lib"
|
|
texreg(list(octo_mmtmodel1, issue_mmtmodel1, wiki_mmtmodel1), stars=NULL, digits=2,
|
|
custom.model.names=c( 'M1: augm. formality','M2: MMT', 'M3: milestones' ),
|
|
custom.coef.names=c('(Intercept)', 'Augmented formality', 'MMT', 'Age-2', 'Age-3', 'Age-4', 'Milestones'),
|
|
use.packages=FALSE, table=FALSE, ci.force = TRUE)
|
|
texreg(list(mmtmodel1, issue_mmtmodel1, wiki_mmtmodel1), stars=NULL, digits=2,
|
|
custom.model.names=c( 'MMT (Overall Dataset)'),
|
|
custom.coef.names=c('(Intercept)', 'MMT', 'Age-2', 'Age-3', 'Age-4'),
|
|
use.packages=FALSE, table=TRUE, ci.force = TRUE)
|
|
readme_data <- read_csv("../final_data/deb_readme_roster.csv", show_col_types = FALSE)
|
|
readme_data <- read_csv("../final_data/deb_readme_roster.csv", show_col_types = FALSE)
|
|
#below here is the analysis for the readme data
|
|
readme_data$new.age <- as.numeric(cut(readme_data$age_of_project/365, breaks=c(0,7.524197,10.323056,13.649367,17), labels=c(1,2,3,4)))
|
|
table(readme_data$new.age)
|
|
readme_data$new.age.factor <- as.factor(readme_data$new.age)
|
|
hist(readme_data$new.age)
|
|
637 / 2694
|
|
676 / 2694
|
|
725 / 2694
|
|
656 / 2694
|
|
contributing_data <- read_csv("../final_data/deb_contribfile_roster.csv", show_col_types = FALSE)
|
|
#below here is the analysis for the contributing.md files
|
|
readme_data$new.age <- as.numeric(cut(readme_data$age_of_project/365, breaks=c(0,7.524197,10.323056,13.649367,17), labels=c(1,2,3,4)))
|
|
table(readme_data$new.age)
|
|
readme_data$new.age.factor <- as.factor(readme_data$new.age)
|
|
#below here is the analysis for the contributing.md files
|
|
contributing_data$new.age <- as.numeric(cut(contributing_data$age_of_project/365, breaks=c(0,7.524197,10.323056,13.649367,17), labels=c(1,2,3,4)))
|
|
table(contributing_data$new.age)
|
|
contributing_data$new.age.factor <- as.factor(contributing_data$new.age)
|
|
hist(contributing_data$new.age)
|
|
76/528
|
|
119 / 528
|
|
171/ 528
|
|
162 / 528
|