various R activity
BIN
R/.DS_Store
vendored
Normal file
1002
R/.Rhistory
BIN
R/0119-final-mmt.png
Normal file
After Width: | Height: | Size: 57 KiB |
BIN
R/630_0119_final.png
Normal file
After Width: | Height: | Size: 57 KiB |
@ -34,8 +34,9 @@ data1$new_milestones <- as.numeric(data1$milestones > 0) + 1
|
|||||||
data1$formal.score <- data1$mmt / (data1$old_milestones/data1$age)
|
data1$formal.score <- data1$mmt / (data1$old_milestones/data1$age)
|
||||||
table(data1$formal.score)
|
table(data1$formal.score)
|
||||||
hist(data1$old_mmt, prob=TRUE) #inequality of participation
|
hist(data1$old_mmt, prob=TRUE) #inequality of participation
|
||||||
hist(data1$formal.score)
|
median(data1$contributors)
|
||||||
hist(data1$age/365)
|
median(data1$collaborators)
|
||||||
|
median(data1$age/365)
|
||||||
data1$new_mmt <- data1$mmt - 1
|
data1$new_mmt <- data1$mmt - 1
|
||||||
hist(data1$new_mmt, prob=TRUE)
|
hist(data1$new_mmt, prob=TRUE)
|
||||||
|
|
||||||
@ -67,15 +68,35 @@ cor.test(data1$mmt, data1$up.fac.mean)
|
|||||||
cor.test(data1$milestones, data1$up.fac.mean)
|
cor.test(data1$milestones, data1$up.fac.mean)
|
||||||
cor.test(data1$age, data1$up.fac.mean)
|
cor.test(data1$age, data1$up.fac.mean)
|
||||||
|
|
||||||
|
data1$new.age.factor <- as.factor(data1$new.age)
|
||||||
|
#geom_abline(intercept=coef(mmtmodel1)[1], slope=coef(mmtmodel1)[2], colour = "orange")+
|
||||||
|
|
||||||
g <- ggplot(data1, aes(x=mmt, y=up.fac.mean)) +
|
g <- ggplot(data1, aes(x=mmt, y=up.fac.mean)) +
|
||||||
geom_point() +
|
geom_point() +
|
||||||
geom_smooth() +
|
#geom_smooth( method="lm", formula=(y~x), colour = "orange")+
|
||||||
|
geom_abline(intercept=coef(mmtmodel1)[1], slope=coef(mmtmodel1)[2], colour = "orange", size=1)+
|
||||||
|
geom_errorbar(aes(ymin=y-yerr, ymax=y+yerr), width=0.09)+
|
||||||
xlab("MMT") +
|
xlab("MMT") +
|
||||||
ylab("Underproduction Factor") +
|
ylab("Underproduction Factor") +
|
||||||
theme_bw()
|
theme_bw()
|
||||||
g
|
g
|
||||||
g
|
g
|
||||||
|
|
||||||
|
colors_legend <- c("a"="#E69F00","b"="#56B4E9", "c"="#D55E00","d"="#CC79A7")
|
||||||
|
#colors_legend <- c("0-9y"="red","9-12y"="green", "12-15y"="blue","15-16y"="orange")
|
||||||
|
g <- ggplot(data1, aes(x=mmt, y=up.fac.mean)) +
|
||||||
|
geom_point() +
|
||||||
|
geom_abline(aes(intercept=1.65, slope=-1.38, color="a"), size=1.5)+
|
||||||
|
geom_abline(aes(intercept=1.72, slope=-1.38, color="b"), size=1.5)+
|
||||||
|
geom_abline(aes(intercept=2.25, slope=-1.38, color="c" ), size=1.5)+
|
||||||
|
geom_abline(aes(intercept=2.8, slope=-1.38, color="d") , size=1.5)+
|
||||||
|
labs(x="MMT", y="Mean Underproduction Factor", color = "Project Age Group") +
|
||||||
|
scale_colour_manual(values=colors_legend, labels=c("0-9y", "9-12y", "12-15y","15-16y")) +
|
||||||
|
theme_bw()+
|
||||||
|
theme(legend.position = c(0.05, 0.05), legend.justification = c("left", "bottom"))
|
||||||
|
g
|
||||||
|
|
||||||
|
|
||||||
data2 <- subset(data1, (data1$age / 365) < 14 )
|
data2 <- subset(data1, (data1$age / 365) < 14 )
|
||||||
hist(floor(data2$age))
|
hist(floor(data2$age))
|
||||||
g <- ggplot(data2, aes(x=mmt, y=up.fac.mean)) +
|
g <- ggplot(data2, aes(x=mmt, y=up.fac.mean)) +
|
||||||
|
BIN
R/data_subset_agegroup.png
Normal file
After Width: | Height: | Size: 41 KiB |
BIN
R/final-mmt-plot.png
Normal file
After Width: | Height: | Size: 59 KiB |
BIN
R/final-mmt-underprod-final-last.png
Normal file
After Width: | Height: | Size: 60 KiB |
143
R/newAnalysis.R
Normal file
@ -0,0 +1,143 @@
|
|||||||
|
rm(list=ls())
|
||||||
|
set.seed(424242)
|
||||||
|
|
||||||
|
library(readr)
|
||||||
|
library(ggplot2)
|
||||||
|
|
||||||
|
data1 <- read_csv('../kk_final_expanded_data_final.csv',show_col_types = FALSE)
|
||||||
|
data2 <- read_csv('../kk_final_octo_data_total.csv',show_col_types = FALSE)
|
||||||
|
data3 <- read_csv('../kk_final_doclist_roster.csv',show_col_types = FALSE)
|
||||||
|
data4 <-read_csv('../kk_final_rosterslist.csv',show_col_types = FALSE)
|
||||||
|
#getting data subset metadata
|
||||||
|
|
||||||
|
head(data1)
|
||||||
|
head(data2)
|
||||||
|
head(data3)
|
||||||
|
head(data4)
|
||||||
|
|
||||||
|
length(which(data2$underproduction_low < 0))
|
||||||
|
mean(data2$underproduction_mean)
|
||||||
|
|
||||||
|
length(which(data1$underproduction_low < 0))
|
||||||
|
mean(data1$underproduction_mean)
|
||||||
|
|
||||||
|
length(which(data3$underproduction_low < 0))
|
||||||
|
mean(data3$underproduction_mean)
|
||||||
|
|
||||||
|
length(which(data4$underproduction_low < 0))
|
||||||
|
mean(data4$underproduction_mean)
|
||||||
|
|
||||||
|
data1$mmt <- (((data1$collaborators * 2)+ data1$contributors) / (data1$contributors + data1$collaborators)) - 1
|
||||||
|
mean(data1$mmt)
|
||||||
|
hist(data1$mmt, probability = TRUE)
|
||||||
|
|
||||||
|
data1$new.age <- as.numeric(cut(data1$age_of_project/365, breaks=c(0,9,12,15,17), labels=c(1,2,3,4)))
|
||||||
|
table(data1$new.age)
|
||||||
|
data1$new.age.factor <- as.factor(data1$new.age)
|
||||||
|
hist(data1$new.age)
|
||||||
|
age1 <- c(0.39369, 0.239271, 0.2096806, 0.1573584)
|
||||||
|
|
||||||
|
d1label <- rep("Overall", length(data1$new.age.factor))
|
||||||
|
d1per <- data1$new.age
|
||||||
|
d1per[d1per==1] <- 39.37
|
||||||
|
d1per[d1per==2] <- 23.93
|
||||||
|
d1per[d1per==3] <- 20.97
|
||||||
|
d1per[d1per==4] <- 15.74
|
||||||
|
d1per.factor<- as.factor(d1per)
|
||||||
|
|
||||||
|
data5 <- (d1label)
|
||||||
|
|
||||||
|
data2$new.age <- as.numeric(cut(data2$age_of_project/365, breaks=c(0,9,12,15,17), labels=c(1,2,3,4)))
|
||||||
|
table(data2$new.age)
|
||||||
|
data2$new.age.factor <- as.factor(data2$new.age)
|
||||||
|
hist(data2$new.age)
|
||||||
|
age2 <- c(0.5675676, 0.1981982, 0.1681682, 0.06606607)
|
||||||
|
|
||||||
|
d2label <- rep("Expanded Contrib.", length(data2$new.age.factor))
|
||||||
|
d2per <- data2$new.age
|
||||||
|
d2per[d2per==1] <- 56.76
|
||||||
|
d2per[d2per==2] <- 19.82
|
||||||
|
d2per[d2per==3] <- 16.82
|
||||||
|
d2per[d2per==4] <- 06.61
|
||||||
|
d2per.factor <- as.factor(d2per)
|
||||||
|
|
||||||
|
data3$new.age <- as.numeric(cut(data3$age_of_project/365, breaks=c(0,9,12,15,17), labels=c(1,2,3,4)))
|
||||||
|
table(data3$new.age)
|
||||||
|
data3$new.age.factor <- as.factor(data3$new.age)
|
||||||
|
hist(data3$new.age)
|
||||||
|
age3 <-c(0.2556818, 0.2954545, 0.2405303, 0.2083333)
|
||||||
|
d3label <- rep("Contrib. Files", length(data3$new.age.factor))
|
||||||
|
d3per <- data3$new.age
|
||||||
|
d3per[d3per==1] <- 25.57
|
||||||
|
d3per[d3per==2] <- 29.55
|
||||||
|
d3per[d3per==3] <- 24.05
|
||||||
|
d3per[d3per==4] <- 20.83
|
||||||
|
d3per.factor <- as.factor(d3per)
|
||||||
|
|
||||||
|
data4$new.age <- as.numeric(cut(data4$age_of_project/365, breaks=c(0,9,12,15,17), labels=c(1,2,3,4)))
|
||||||
|
table(data4$new.age)
|
||||||
|
data4$new.age.factor <- as.factor(data4$new.age)
|
||||||
|
hist(data4$new.age)
|
||||||
|
age4 <- c(0.5, 0.125, 0.125, 0.25)
|
||||||
|
d4label <- rep("Contrib. Rosters", length(data4$new.age.factor))
|
||||||
|
d4per <- data4$new.age
|
||||||
|
d4per[d4per==1] <- 57.14
|
||||||
|
d4per[d4per==2] <- 14.29
|
||||||
|
d4per[d4per==3] <- 14.29
|
||||||
|
d4per[d4per==4] <- 28.57
|
||||||
|
d4per.factor <- as.factor(d4per)
|
||||||
|
|
||||||
|
all_per <- c(d1per.factor, d2per.factor, d3per.factor, d4per.factor)
|
||||||
|
all_persss <- c(d1per, d2per, d3per, d4per)
|
||||||
|
all_labels <- c(d1label, d2label, d3label, d4label)
|
||||||
|
all_age_groups <- c(data1$new.age.factor, data2$new.age.factor, data3$new.age.factor, data4$new.age.factor)
|
||||||
|
|
||||||
|
d5 <- data.frame(labels = all_labels,
|
||||||
|
age_groups = all_age_groups,
|
||||||
|
per = all_per,
|
||||||
|
persss = all_persss)
|
||||||
|
d5 <- na.omit(d5)
|
||||||
|
|
||||||
|
g <- ggplot(d5, aes(fill=forcats::fct_rev(age_groups), y = 1, x=forcats::fct_rev(labels))) +
|
||||||
|
geom_bar(position="fill", stat="identity") +
|
||||||
|
scale_fill_discrete(name = "Project Age Group", labels = c("15-16y", "12-15y", "9-12y", "0-9y"), guide = guide_legend(reverse = TRUE)) +
|
||||||
|
xlab("Dataset") +
|
||||||
|
ylab("Age Grouping Percentage") +
|
||||||
|
theme_bw()+
|
||||||
|
theme(axis.text.x = element_text(angle = 0), legend.position="top")
|
||||||
|
g
|
||||||
|
|
||||||
|
sdata1$new_milestones <- as.numeric(data1$milestone_count > 0) + 1
|
||||||
|
data1$new.formal.score <- data1$mmt / (data1$new_milestones/data1$new.age)
|
||||||
|
|
||||||
|
mmtmodel1 <- lm(underproduction_mean ~ mmt + as.factor(new.age), data=data1)
|
||||||
|
summary(mmtmodel1)
|
||||||
|
|
||||||
|
agemodel1 <- lm(mmt ~ age_of_project, data=data1)
|
||||||
|
summary(agemodel1)
|
||||||
|
|
||||||
|
fsmodel2 <- lm(underproduction_mean ~ new.formal.score, data=data1)
|
||||||
|
summary(fsmodel2)
|
||||||
|
|
||||||
|
g <- ggplot(data1, aes(x=mmt, y=underproduction_mean)) +
|
||||||
|
geom_point() +
|
||||||
|
geom_smooth(method='lm', formula= y~x) +
|
||||||
|
xlab("MMT") +
|
||||||
|
ylab("Underproduction Factor") +
|
||||||
|
theme_bw()
|
||||||
|
g
|
||||||
|
|
||||||
|
#shows the cross-age downward slopes for all underproduction averages in the face of MMT
|
||||||
|
g3 <- ggplot(data1, aes(x=mmt, y=underproduction_mean)) +
|
||||||
|
geom_smooth(mapping = aes(x=mmt, y=underproduction_mean, color=new.age.factor),
|
||||||
|
method='lm', formula= y~x) +
|
||||||
|
xlab("MMT") +
|
||||||
|
ylab("Underproduction Factor") +
|
||||||
|
theme_bw()
|
||||||
|
g3
|
||||||
|
|
||||||
|
cor.test(data1$mmt, data1$new.age)
|
||||||
|
age_data <- subset(data1, !is.na(new.age))
|
||||||
|
g2 <- ggplot(age_data, aes(x=factor(new.age), y=mmt))+
|
||||||
|
geom_boxplot()
|
||||||
|
g2
|
BIN
R/new_mmt_underprod_plot.png
Normal file
After Width: | Height: | Size: 47 KiB |
Before Width: | Height: | Size: 65 KiB After Width: | Height: | Size: 50 KiB |
9
R/plotting_age.R
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
rm(list=ls())
|
||||||
|
set.seed(424242)
|
||||||
|
|
||||||
|
library(readr)
|
||||||
|
library(ggplot2)
|
||||||
|
data1 <- read_csv('../age_percentages.csv',show_col_types = FALSE)
|
||||||
|
|
||||||
|
head(data1)
|
||||||
|
|
BIN
R/temp-mmt-colors.png
Normal file
After Width: | Height: | Size: 49 KiB |
BIN
R/temp-temp.png
Normal file
After Width: | Height: | Size: 53 KiB |
5
age_percentages.csv
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
setname,age1,age2,age3,age4
|
||||||
|
data1,0.39369,0.239271,0.2096806,0.1573584
|
||||||
|
data2,0.5675676,0.1981982,0.1681682,0.06606607
|
||||||
|
data3,0.2556818,0.2954545,0.2405303,0.2083333
|
||||||
|
data4,0.5714286,0.1428571,0.1428571,0.2857143
|
|
@ -1,8 +0,0 @@
|
|||||||
{\rtf1\ansi\ansicpg1252\cocoartf2708
|
|
||||||
\cocoatextscaling0\cocoaplatform0{\fonttbl\f0\fswiss\fcharset0 Helvetica;}
|
|
||||||
{\colortbl;\red255\green255\blue255;}
|
|
||||||
{\*\expandedcolortbl;;}
|
|
||||||
\margl1440\margr1440\vieww11520\viewh8400\viewkind0
|
|
||||||
\pard\tx720\tx1440\tx2160\tx2880\tx3600\tx4320\tx5040\tx5760\tx6480\tx7200\tx7920\tx8640\pardirnatural\partightenfactor0
|
|
||||||
|
|
||||||
\f0\fs24 \cf0 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}
|
|