various R activity
							
								
								
									
										
											BIN
										
									
								
								R/.DS_Store
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
							
								
								
									
										1002
									
								
								R/.Rhistory
									
									
									
									
									
								
							
							
						
						
							
								
								
									
										
											BIN
										
									
								
								R/0119-final-mmt.png
									
									
									
									
									
										Normal file
									
								
							
							
						
						| After Width: | Height: | Size: 57 KiB | 
							
								
								
									
										
											BIN
										
									
								
								R/630_0119_final.png
									
									
									
									
									
										Normal file
									
								
							
							
						
						| After Width: | Height: | Size: 57 KiB | 
| @ -34,8 +34,9 @@ data1$new_milestones <- as.numeric(data1$milestones > 0) + 1 | |||||||
| data1$formal.score <- data1$mmt / (data1$old_milestones/data1$age) | data1$formal.score <- data1$mmt / (data1$old_milestones/data1$age) | ||||||
| table(data1$formal.score) | table(data1$formal.score) | ||||||
| hist(data1$old_mmt, prob=TRUE) #inequality of participation | hist(data1$old_mmt, prob=TRUE) #inequality of participation | ||||||
| hist(data1$formal.score) | median(data1$contributors) | ||||||
| hist(data1$age/365) | median(data1$collaborators) | ||||||
|  | median(data1$age/365) | ||||||
| data1$new_mmt <- data1$mmt - 1 | data1$new_mmt <- data1$mmt - 1 | ||||||
| hist(data1$new_mmt, prob=TRUE) | hist(data1$new_mmt, prob=TRUE) | ||||||
| 
 | 
 | ||||||
| @ -67,15 +68,35 @@ cor.test(data1$mmt, data1$up.fac.mean) | |||||||
| cor.test(data1$milestones, data1$up.fac.mean) | cor.test(data1$milestones, data1$up.fac.mean) | ||||||
| cor.test(data1$age, data1$up.fac.mean) | cor.test(data1$age, data1$up.fac.mean) | ||||||
| 
 | 
 | ||||||
|  | data1$new.age.factor <- as.factor(data1$new.age) | ||||||
|  | #geom_abline(intercept=coef(mmtmodel1)[1], slope=coef(mmtmodel1)[2], colour = "orange")+ | ||||||
|  | 
 | ||||||
| g <- ggplot(data1, aes(x=mmt, y=up.fac.mean)) + | g <- ggplot(data1, aes(x=mmt, y=up.fac.mean)) + | ||||||
|   geom_point() + |   geom_point() + | ||||||
|   geom_smooth() + |   #geom_smooth( method="lm", formula=(y~x), colour = "orange")+ | ||||||
|  |   geom_abline(intercept=coef(mmtmodel1)[1], slope=coef(mmtmodel1)[2], colour = "orange", size=1)+ | ||||||
|  |   geom_errorbar(aes(ymin=y-yerr, ymax=y+yerr), width=0.09)+ | ||||||
|   xlab("MMT") + |   xlab("MMT") + | ||||||
|   ylab("Underproduction Factor") + |   ylab("Underproduction Factor") + | ||||||
|   theme_bw()  |   theme_bw()  | ||||||
| g | g | ||||||
| g | g | ||||||
| 
 | 
 | ||||||
|  | colors_legend <- c("a"="#E69F00","b"="#56B4E9", "c"="#D55E00","d"="#CC79A7") | ||||||
|  | #colors_legend <- c("0-9y"="red","9-12y"="green", "12-15y"="blue","15-16y"="orange") | ||||||
|  | g <- ggplot(data1, aes(x=mmt, y=up.fac.mean)) + | ||||||
|  |   geom_point() + | ||||||
|  |   geom_abline(aes(intercept=1.65, slope=-1.38, color="a"), size=1.5)+ | ||||||
|  |   geom_abline(aes(intercept=1.72, slope=-1.38, color="b"), size=1.5)+ | ||||||
|  |   geom_abline(aes(intercept=2.25, slope=-1.38, color="c" ), size=1.5)+ | ||||||
|  |   geom_abline(aes(intercept=2.8, slope=-1.38, color="d") , size=1.5)+ | ||||||
|  |   labs(x="MMT", y="Mean Underproduction Factor", color = "Project Age Group") + | ||||||
|  |   scale_colour_manual(values=colors_legend, labels=c("0-9y", "9-12y", "12-15y","15-16y")) + | ||||||
|  |   theme_bw()+ | ||||||
|  |   theme(legend.position = c(0.05, 0.05), legend.justification = c("left", "bottom")) | ||||||
|  | g | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
| data2 <- subset(data1, (data1$age / 365) < 14 ) | data2 <- subset(data1, (data1$age / 365) < 14 ) | ||||||
| hist(floor(data2$age)) | hist(floor(data2$age)) | ||||||
| g <- ggplot(data2, aes(x=mmt, y=up.fac.mean)) + | g <- ggplot(data2, aes(x=mmt, y=up.fac.mean)) + | ||||||
|  | |||||||
							
								
								
									
										
											BIN
										
									
								
								R/data_subset_agegroup.png
									
									
									
									
									
										Normal file
									
								
							
							
						
						| After Width: | Height: | Size: 41 KiB | 
							
								
								
									
										
											BIN
										
									
								
								R/final-mmt-plot.png
									
									
									
									
									
										Normal file
									
								
							
							
						
						| After Width: | Height: | Size: 59 KiB | 
							
								
								
									
										
											BIN
										
									
								
								R/final-mmt-underprod-final-last.png
									
									
									
									
									
										Normal file
									
								
							
							
						
						| After Width: | Height: | Size: 60 KiB | 
							
								
								
									
										143
									
								
								R/newAnalysis.R
									
									
									
									
									
										Normal file
									
								
							
							
						
						| @ -0,0 +1,143 @@ | |||||||
|  | rm(list=ls()) | ||||||
|  | set.seed(424242)  | ||||||
|  | 
 | ||||||
|  | library(readr) | ||||||
|  | library(ggplot2) | ||||||
|  | 
 | ||||||
|  | data1 <- read_csv('../kk_final_expanded_data_final.csv',show_col_types = FALSE) | ||||||
|  | data2 <- read_csv('../kk_final_octo_data_total.csv',show_col_types = FALSE) | ||||||
|  | data3 <- read_csv('../kk_final_doclist_roster.csv',show_col_types = FALSE) | ||||||
|  | data4 <-read_csv('../kk_final_rosterslist.csv',show_col_types = FALSE) | ||||||
|  | #getting data subset metadata | ||||||
|  | 
 | ||||||
|  | head(data1) | ||||||
|  | head(data2) | ||||||
|  | head(data3) | ||||||
|  | head(data4) | ||||||
|  | 
 | ||||||
|  | length(which(data2$underproduction_low < 0)) | ||||||
|  | mean(data2$underproduction_mean) | ||||||
|  | 
 | ||||||
|  | length(which(data1$underproduction_low < 0)) | ||||||
|  | mean(data1$underproduction_mean) | ||||||
|  | 
 | ||||||
|  | length(which(data3$underproduction_low < 0)) | ||||||
|  | mean(data3$underproduction_mean) | ||||||
|  | 
 | ||||||
|  | length(which(data4$underproduction_low < 0)) | ||||||
|  | mean(data4$underproduction_mean) | ||||||
|  | 
 | ||||||
|  | data1$mmt <- (((data1$collaborators * 2)+ data1$contributors) / (data1$contributors + data1$collaborators)) - 1 | ||||||
|  | mean(data1$mmt) | ||||||
|  | hist(data1$mmt, probability = TRUE) | ||||||
|  | 
 | ||||||
|  | data1$new.age <- as.numeric(cut(data1$age_of_project/365, breaks=c(0,9,12,15,17), labels=c(1,2,3,4))) | ||||||
|  | table(data1$new.age) | ||||||
|  | data1$new.age.factor <- as.factor(data1$new.age) | ||||||
|  | hist(data1$new.age) | ||||||
|  | age1 <- c(0.39369, 0.239271, 0.2096806, 0.1573584) | ||||||
|  | 
 | ||||||
|  | d1label <- rep("Overall", length(data1$new.age.factor)) | ||||||
|  | d1per <- data1$new.age | ||||||
|  | d1per[d1per==1] <- 39.37 | ||||||
|  | d1per[d1per==2] <- 23.93 | ||||||
|  | d1per[d1per==3] <- 20.97 | ||||||
|  | d1per[d1per==4] <- 15.74 | ||||||
|  | d1per.factor<- as.factor(d1per) | ||||||
|  | 
 | ||||||
|  | data5 <- (d1label) | ||||||
|  | 
 | ||||||
|  | data2$new.age <- as.numeric(cut(data2$age_of_project/365, breaks=c(0,9,12,15,17), labels=c(1,2,3,4))) | ||||||
|  | table(data2$new.age) | ||||||
|  | data2$new.age.factor <- as.factor(data2$new.age) | ||||||
|  | hist(data2$new.age) | ||||||
|  | age2 <- c(0.5675676, 0.1981982, 0.1681682, 0.06606607) | ||||||
|  | 
 | ||||||
|  | d2label <- rep("Expanded Contrib.", length(data2$new.age.factor)) | ||||||
|  | d2per <- data2$new.age | ||||||
|  | d2per[d2per==1] <- 56.76 | ||||||
|  | d2per[d2per==2] <- 19.82 | ||||||
|  | d2per[d2per==3] <- 16.82 | ||||||
|  | d2per[d2per==4] <- 06.61 | ||||||
|  | d2per.factor <- as.factor(d2per) | ||||||
|  | 
 | ||||||
|  | data3$new.age <- as.numeric(cut(data3$age_of_project/365, breaks=c(0,9,12,15,17), labels=c(1,2,3,4))) | ||||||
|  | table(data3$new.age) | ||||||
|  | data3$new.age.factor <- as.factor(data3$new.age) | ||||||
|  | hist(data3$new.age) | ||||||
|  | age3 <-c(0.2556818, 0.2954545, 0.2405303, 0.2083333) | ||||||
|  | d3label <- rep("Contrib. Files", length(data3$new.age.factor)) | ||||||
|  | d3per <- data3$new.age | ||||||
|  | d3per[d3per==1] <- 25.57 | ||||||
|  | d3per[d3per==2] <- 29.55 | ||||||
|  | d3per[d3per==3] <- 24.05 | ||||||
|  | d3per[d3per==4] <- 20.83 | ||||||
|  | d3per.factor <- as.factor(d3per) | ||||||
|  | 
 | ||||||
|  | data4$new.age <- as.numeric(cut(data4$age_of_project/365, breaks=c(0,9,12,15,17), labels=c(1,2,3,4))) | ||||||
|  | table(data4$new.age) | ||||||
|  | data4$new.age.factor <- as.factor(data4$new.age) | ||||||
|  | hist(data4$new.age) | ||||||
|  | age4 <- c(0.5, 0.125, 0.125, 0.25) | ||||||
|  | d4label <- rep("Contrib. Rosters", length(data4$new.age.factor)) | ||||||
|  | d4per <- data4$new.age | ||||||
|  | d4per[d4per==1] <- 57.14 | ||||||
|  | d4per[d4per==2] <- 14.29 | ||||||
|  | d4per[d4per==3] <- 14.29 | ||||||
|  | d4per[d4per==4] <- 28.57 | ||||||
|  | d4per.factor <- as.factor(d4per) | ||||||
|  | 
 | ||||||
|  | all_per <- c(d1per.factor, d2per.factor, d3per.factor, d4per.factor) | ||||||
|  | all_persss <- c(d1per, d2per, d3per, d4per) | ||||||
|  | all_labels <- c(d1label, d2label, d3label, d4label) | ||||||
|  | all_age_groups <- c(data1$new.age.factor, data2$new.age.factor, data3$new.age.factor, data4$new.age.factor) | ||||||
|  | 
 | ||||||
|  | d5 <- data.frame(labels = all_labels, | ||||||
|  |                  age_groups = all_age_groups, | ||||||
|  |                  per = all_per, | ||||||
|  |                  persss = all_persss) | ||||||
|  | d5 <- na.omit(d5) | ||||||
|  | 
 | ||||||
|  | g <- ggplot(d5, aes(fill=forcats::fct_rev(age_groups), y = 1, x=forcats::fct_rev(labels))) +  | ||||||
|  |   geom_bar(position="fill", stat="identity") +  | ||||||
|  |   scale_fill_discrete(name = "Project Age Group", labels = c("15-16y", "12-15y", "9-12y", "0-9y"), guide = guide_legend(reverse = TRUE)) + | ||||||
|  |   xlab("Dataset") + | ||||||
|  |   ylab("Age Grouping Percentage") + | ||||||
|  |   theme_bw()+  | ||||||
|  |   theme(axis.text.x = element_text(angle = 0), legend.position="top") | ||||||
|  | g | ||||||
|  | 
 | ||||||
|  | sdata1$new_milestones <- as.numeric(data1$milestone_count > 0) + 1 | ||||||
|  | data1$new.formal.score <- data1$mmt / (data1$new_milestones/data1$new.age) | ||||||
|  | 
 | ||||||
|  | mmtmodel1 <- lm(underproduction_mean ~ mmt + as.factor(new.age), data=data1) | ||||||
|  | summary(mmtmodel1) | ||||||
|  | 
 | ||||||
|  | agemodel1 <- lm(mmt ~ age_of_project, data=data1) | ||||||
|  | summary(agemodel1) | ||||||
|  | 
 | ||||||
|  | fsmodel2 <- lm(underproduction_mean ~ new.formal.score, data=data1) | ||||||
|  | summary(fsmodel2) | ||||||
|  | 
 | ||||||
|  | g <- ggplot(data1, aes(x=mmt, y=underproduction_mean)) + | ||||||
|  |   geom_point() + | ||||||
|  |   geom_smooth(method='lm', formula= y~x) + | ||||||
|  |   xlab("MMT") + | ||||||
|  |   ylab("Underproduction Factor") + | ||||||
|  |   theme_bw()  | ||||||
|  | g | ||||||
|  | 
 | ||||||
|  | #shows the cross-age downward slopes for all underproduction averages in the face of MMT | ||||||
|  | g3 <- ggplot(data1, aes(x=mmt, y=underproduction_mean)) + | ||||||
|  |   geom_smooth(mapping = aes(x=mmt, y=underproduction_mean, color=new.age.factor), | ||||||
|  |     method='lm', formula= y~x) + | ||||||
|  |   xlab("MMT") + | ||||||
|  |   ylab("Underproduction Factor") + | ||||||
|  |   theme_bw()  | ||||||
|  | g3 | ||||||
|  | 
 | ||||||
|  | cor.test(data1$mmt, data1$new.age) | ||||||
|  | age_data <- subset(data1, !is.na(new.age)) | ||||||
|  | g2 <- ggplot(age_data, aes(x=factor(new.age), y=mmt))+ | ||||||
|  |   geom_boxplot() | ||||||
|  | g2 | ||||||
							
								
								
									
										
											BIN
										
									
								
								R/new_mmt_underprod_plot.png
									
									
									
									
									
										Normal file
									
								
							
							
						
						| After Width: | Height: | Size: 47 KiB | 
| Before Width: | Height: | Size: 65 KiB After Width: | Height: | Size: 50 KiB | 
							
								
								
									
										9
									
								
								R/plotting_age.R
									
									
									
									
									
										Normal file
									
								
							
							
						
						| @ -0,0 +1,9 @@ | |||||||
|  | rm(list=ls()) | ||||||
|  | set.seed(424242)  | ||||||
|  | 
 | ||||||
|  | library(readr) | ||||||
|  | library(ggplot2) | ||||||
|  | data1 <- read_csv('../age_percentages.csv',show_col_types = FALSE) | ||||||
|  | 
 | ||||||
|  | head(data1) | ||||||
|  | 
 | ||||||
							
								
								
									
										
											BIN
										
									
								
								R/temp-mmt-colors.png
									
									
									
									
									
										Normal file
									
								
							
							
						
						| After Width: | Height: | Size: 49 KiB | 
							
								
								
									
										
											BIN
										
									
								
								R/temp-temp.png
									
									
									
									
									
										Normal file
									
								
							
							
						
						| After Width: | Height: | Size: 53 KiB | 
							
								
								
									
										5
									
								
								age_percentages.csv
									
									
									
									
									
										Normal file
									
								
							
							
						
						| @ -0,0 +1,5 @@ | |||||||
|  | setname,age1,age2,age3,age4 | ||||||
|  | data1,0.39369,0.239271,0.2096806,0.1573584 | ||||||
|  | data2,0.5675676,0.1981982,0.1681682,0.06606607 | ||||||
|  | data3,0.2556818,0.2954545,0.2405303,0.2083333 | ||||||
|  | data4,0.5714286,0.1428571,0.1428571,0.2857143 | ||||||
| 
 | 
| @ -1,8 +0,0 @@ | |||||||
| {\rtf1\ansi\ansicpg1252\cocoartf2708 |  | ||||||
| \cocoatextscaling0\cocoaplatform0{\fonttbl\f0\fswiss\fcharset0 Helvetica;} |  | ||||||
| {\colortbl;\red255\green255\blue255;} |  | ||||||
| {\*\expandedcolortbl;;} |  | ||||||
| \margl1440\margr1440\vieww11520\viewh8400\viewkind0 |  | ||||||
| \pard\tx720\tx1440\tx2160\tx2880\tx3600\tx4320\tx5040\tx5760\tx6480\tx7200\tx7920\tx8640\pardirnatural\partightenfactor0 |  | ||||||
| 
 |  | ||||||
| \f0\fs24 \cf0 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} |  | ||||||