diff --git a/R/.Rhistory b/R/.Rhistory index c890d26..adc20ad 100644 --- a/R/.Rhistory +++ b/R/.Rhistory @@ -1,204 +1,3 @@ -ggtitle("Posterior Predictive Density", subtitle="Non-Democracies") + -theme_bw() -p -#plot of reading_ease -p <- ggplot(readme_df, aes(x=flesch_reading_ease, group=as.factor(subdir))) + -geom_histogram(aes( color = as.factor(subdir), fill=as.factor(subdir)), alpha=0.2, position="identity") + -theme_bw() -p -p <- ggplot(contributing_df, aes(x=reading_time, group=as.factor(subdir))) + -geom_histogram(aes( color = as.factor(subdir), fill=as.factor(subdir)), alpha=0.2, position="identity") + -theme_bw() -p -p <- ggplot(contributing_df, aes(x=mcalpine_eflaw, group=as.factor(subdir))) + -geom_histogram(aes( color = as.factor(subdir), fill=as.factor(subdir)), alpha=0.2, position="identity") + -theme_bw() -p -p <- ggplot(contributing_df, aes(x=linsear_write, group=as.factor(subdir))) + -geom_histogram(aes( color = as.factor(subdir), fill=as.factor(subdir)), alpha=0.2, position="identity") + -theme_bw() -p -head(readme_df) -p0 -p0 <- ggplot(contributing_df, aes(x=linsear_write_formula, group=as.factor(subdir))) + -geom_histogram(aes( color = as.factor(subdir), fill=as.factor(subdir)), alpha=0.2, position="identity") + -theme_bw() -p0 -p0 <- ggplot(contributing_df, aes(x=linsear_write_formula, group=as.factor(subdir))) + -geom_histogram(aes( color = as.factor(subdir), fill=as.factor(subdir)), alpha=0.2, position="identity") + -xlim(-5, 30) + -theme_bw() -p0 -p0 <- ggplot(contributing_df, aes(x=linsear_write_formula, group=as.factor(subdir))) + -geom_histogram(aes( color = as.factor(subdir), fill=as.factor(subdir)), alpha=0.2, position="identity") + -xlim(-10, 30) + -theme_bw() -p0 -p0 <- ggplot(contributing_df, aes(x=linsear_write_formula, group=as.factor(subdir))) + -geom_histogram(aes( color = as.factor(subdir), fill=as.factor(subdir)), alpha=0.2, position="identity") + -xlim(-30, 30) + -theme_bw() -p0 -p0 <- ggplot(contributing_df, aes(x=flesch_reading_ease, group=as.factor(subdir))) + -geom_histogram(aes( color = as.factor(subdir), fill=as.factor(subdir)), alpha=0.2, position="identity") + -xlim(-30, 30) + -theme_bw() -p0 -p0 <- ggplot(contributing_df, aes(x=flesch_reading_ease, group=as.factor(subdir))) + -geom_histogram(aes( color = as.factor(subdir), fill=as.factor(subdir)), alpha=0.2, position="identity") + -xlim(-300, 300) + -theme_bw() -p0 -p0 <- ggplot(contributing_df, aes(x=linsear_write_formula, group=as.factor(subdir))) + -geom_histogram(aes( color = as.factor(subdir), fill=as.factor(subdir)), alpha=0.2, position="identity") + -xlim(-30, 30) + -theme_bw() -p0 -readme_linsear_plot <- ggplot(readme_df, aes(x=linsear_write_formula, group=as.factor(subdir))) + -geom_histogram(aes( color = as.factor(subdir), fill=as.factor(subdir)), alpha=0.2, position="identity") + -xlim(-30, 30) + -theme_bw() -p0 -readme_linsear_plot -readme_linsear_plot <- ggplot(readme_df, aes(x=linsear_write_formula, group=as.factor(subdir))) + -geom_histogram(aes( color = as.factor(subdir), fill=as.factor(subdir)), alpha=0.2, position="identity") + -scale_y_continuous(breaks = seq(0,10,1),labels = paste(seq(0, 10, by = 1), "%", sep = ""))+ -xlim(-30, 30) + -theme_bw() -readme_linsear_plot -readme_linsear_plot <- ggplot(readme_df, aes(x=linsear_write_formula, group=as.factor(subdir))) + -geom_histogram(aes( color = as.factor(subdir), fill=as.factor(subdir)), alpha=0.2, position="identity") + -xlim(-30, 30) + -theme_bw() -readme_linsear_plot -readme_linsear_plot <- ggplot(readme_df, aes(x=linsear_write_formula, group=as.factor(subdir))) + -geom_histogram(aes(y = (..count..)/sum(..count..)), color = as.factor(subdir), fill=as.factor(subdir)), alpha=0.2, position="identity") + -readme_linsear_plot <- ggplot(readme_df, aes(x=linsear_write_formula, group=as.factor(subdir))) + -geom_histogram(aes(y = (..count..)/sum(..count..), color = as.factor(subdir), fill=as.factor(subdir)), alpha=0.2, position="identity") + -xlim(-30, 30) + -theme_bw() -readme_linsear_plot -readme_linsear_plot <- ggplot(readme_df, aes(x=linsear_write_formula, group=as.factor(subdir))) + -geom_histogram(aes(color = as.factor(subdir), fill=as.factor(subdir)), alpha=0.2, position="identity") + -xlim(-30, 30) + -theme_bw() -readme_linsear_plot -y = (..count..)/sum(..count..), -y = (..count..)/sum(..count..), -readme_linsear_plot <- ggplot(readme_df, aes(x=linsear_write_formula, group=as.factor(subdir))) + -geom_histogram(aes(y = (..count..)/sum(..count..), color = as.factor(subdir), fill=as.factor(subdir)), alpha=0.2, position="identity") + -xlim(-30, 30) + -theme_bw() -readme_linsear_plot -readme_linsear_plot <- ggplot(readme_df, aes(x=linsear_write_formula, group=as.factor(subdir))) + -geom_histogram(aes(color = as.factor(subdir), fill=as.factor(subdir)), alpha=0.2, position="dodge") + -xlim(-30, 30) + -theme_bw() -readme_linsear_plot -readme_linsear_plot <- ggplot(readme_df, aes(x=linsear_write_formula, group=as.factor(subdir))) + -geom_histogram(aes(color = as.factor(subdir), fill=as.factor(subdir)), alpha=0.2, position="identity") + -xlim(-30, 30) + -theme_bw() -readme_linsear_plot -contributing_linsear_plot <- ggplot(contributing_df, aes(x=linsear_write_formula, group=as.factor(subdir))) + -geom_histogram(aes(color = as.factor(subdir), fill=as.factor(subdir)), alpha=0.2, position="identity") + -xlim(-30, 30) + -theme_bw() -contributing_linsear_plot -contributing_linsear_plot <- ggplot(contributing_df, aes(x=linsear_write_formula, group=as.factor(subdir))) + -geom_density(aes(color = as.factor(subdir), fill=as.factor(subdir)), alpha=0.2, position="identity") + -xlim(-30, 30) + -theme_bw() -contributing_linsear_plot -readme_linsear_plot <- ggplot(readme_df, aes(x=linsear_write_formula, group=as.factor(subdir))) + -geom_density(aes(color = as.factor(subdir), fill=as.factor(subdir)), alpha=0.2, position="identity") + -xlim(-30, 30) + -theme_bw() -readme_linsear_plot -contributing_linsear_plot -contributing_reading_time_plot <- ggplot(contributing_df, aes(x=linsear_write_formula, group=as.factor(subdir))) + -geom_density(aes(color = as.factor(subdir), fill=as.factor(subdir)), alpha=0.2, position="identity") + -xlim(-30, 30) + -theme_bw() -contributing_reading_time_plot -contributing_reading_time_plot -contributing_reading_time_plot <- ggplot(contributing_df, aes(x=reading_time, group=as.factor(subdir))) + -geom_density(aes(color = as.factor(subdir), fill=as.factor(subdir)), alpha=0.2, position="identity") + -xlim(-30, 30) + -theme_bw() -contributing_reading_time_plot -contributing_reading_time_plot <- ggplot(contributing_df, aes(x=reading_time, group=as.factor(subdir))) + -geom_density(aes(color = as.factor(subdir), fill=as.factor(subdir)), alpha=0.2, position="identity") + -xlim(-10, 50) + -theme_bw() -contributing_reading_time_plot -contributing_reading_time_plot <- ggplot(contributing_df, aes(x=reading_time, group=as.factor(subdir))) + -geom_density(aes(color = as.factor(subdir), fill=as.factor(subdir)), alpha=0.2, position="identity") + -xlim(-10, 70) + -theme_bw() -contributing_reading_time_plot -contributing_reading_time_plot <- ggplot(contributing_df, aes(x=reading_time, group=as.factor(subdir))) + -geom_density(aes(color = as.factor(subdir), fill=as.factor(subdir)), alpha=0.2, position="identity") + -xlim(-10, 80) + -theme_bw() -contributing_reading_time_plot -contributing_reading_time_plot <- ggplot(contributing_df, aes(x=reading_time, group=as.factor(subdir))) + -geom_density(aes(color = as.factor(subdir), fill=as.factor(subdir)), alpha=0.2, position="identity") + -xlim(-10, 90) + -theme_bw() -contributing_reading_time_plot -contributing_mcalpine_eflaw <- ggplot(contributing_df, aes(x=mcalpine_eflaw, group=as.factor(subdir))) + -geom_density(aes(color = as.factor(subdir), fill=as.factor(subdir)), alpha=0.2, position="identity") + -xlim(-10, 90) + -theme_bw() -contributing_mcalpine_eflaw -contributing_mcalpine_eflaw <- ggplot(contributing_df, aes(x=mcalpine_eflaw, group=as.factor(subdir))) + -geom_density(aes(color = as.factor(subdir), fill=as.factor(subdir)), alpha=0.2, position="identity") + -xlim(-10, 70) + -theme_bw() -contributing_mcalpine_eflaw -contributing_reading_ease <- ggplot(contributing_df, aes(x=flesch_reading_ease, group=as.factor(subdir))) + -geom_density(aes(color = as.factor(subdir), fill=as.factor(subdir)), alpha=0.2, position="identity") + -xlim(-10, 70) + -theme_bw() -contributing_reading_ease -contributing_reading_ease <- ggplot(contributing_df, aes(x=flesch_reading_ease, group=as.factor(subdir))) + -geom_density(aes(color = as.factor(subdir), fill=as.factor(subdir)), alpha=0.2, position="identity") + -xlim(-10, 90) + -theme_bw() -contributing_reading_ease -grid.arrange(contributing_reading_ease, contributing_linsear_plot, contributing_mcalpine_eflaw, contributing_reading_time_plot, nrow = 2) -library(gridExtra) -grid.arrange(contributing_reading_ease, contributing_linsear_plot, contributing_mcalpine_eflaw, contributing_reading_time_plot, nrow = 2) -# plotting contributing linsear writing formula -contributing_linsear_plot <- ggplot(contributing_df, aes(x=linsear_write_formula, group=as.factor(subdir))) + -geom_density(aes(color = as.factor(subdir), fill=as.factor(subdir)), alpha=0.2, position="identity") + -xlim(-30, 30) + -theme_bw(legend.position="none") -# plotting contributing reading time -contributing_reading_time_plot <- ggplot(contributing_df, aes(x=reading_time, group=as.factor(subdir))) + -geom_density(aes(color = as.factor(subdir), fill=as.factor(subdir)), alpha=0.2, position="identity") + -xlim(-10, 90) + -theme_bw(legend.position="none") -# plotting contributing mcalpine eflaw -contributing_mcalpine_eflaw <- ggplot(contributing_df, aes(x=mcalpine_eflaw, group=as.factor(subdir))) + -geom_density(aes(color = as.factor(subdir), fill=as.factor(subdir)), alpha=0.2, position="identity") + -xlim(-10, 70) + -theme_bw(legend.position="none") -# plotting contributing reading ease -contributing_reading_ease <- ggplot(contributing_df, aes(x=flesch_reading_ease, group=as.factor(subdir))) + -geom_density(aes(color = as.factor(subdir), fill=as.factor(subdir)), alpha=0.2, position="identity") + -theme(legend.position = "top") + -xlim(-10, 90) + -theme_bw() -contributing_reading_ease -grid.arrange(contributing_reading_ease, contributing_linsear_plot, contributing_mcalpine_eflaw, contributing_reading_time_plot, nrow = 2) -# plotting contributing mcalpine eflaw -contributing_mcalpine_eflaw <- ggplot(contributing_df, aes(x=mcalpine_eflaw, group=as.factor(subdir))) + -geom_density(aes(color = as.factor(subdir), fill=as.factor(subdir)), alpha=0.2, position="identity") + -xlim(-10, 70) + -theme(legend.position="none")+ -theme_bw() # plotting contributing reading ease contributing_reading_ease <- ggplot(contributing_df, aes(x=flesch_reading_ease, group=as.factor(subdir))) + geom_density(aes(color = as.factor(subdir), fill=as.factor(subdir)), alpha=0.2, position="identity") + @@ -510,3 +309,204 @@ theme_bw() + theme(legend.position = "top") contributing_reading_ease grid.arrange(contributing_reading_ease, contributing_reading_time_plot,readme_reading_ease, readme_reading_time_plot, nrow = 2) +library(tidyverse) +library(plyr) +library(gridExtra) +library(ggpubr) +# script for the analysis of document readability metrics +# readability metrics will be studied controlled by their length +# gaughan@u.northwestern.edu +# loading in the data +try(setwd(dirname(rstudioapi::getActiveDocumentContext()$path))) +readme_df <- read_csv("../text_analysis/draft_readability_readme.csv") +contributing_df <- read_csv("../text_analysis/draft_readability_contributing.csv") +#getting basic stats for the readme readability +median(readme_df$flesch_reading_ease) +median(readme_df$linsear_write_formula) +median(contributing_df$reading_time) +median(contributing_df$linsear_write_formula) +#plotting readme reading ease +readme_reading_ease <- ggplot(readme_df, aes(x=flesch_reading_ease, group=as.factor(subdir))) + +geom_density(aes(color = as.factor(subdir), fill=as.factor(subdir)), alpha=0.2, position="identity") + +scale_color_manual(values = subdirColors) + +xlim(-10, 90) + +ylab("readme density") + +guides(fill="none", color="none")+ +theme_bw() +readme_reading_ease +#plotting readme reading time +readme_reading_time_plot <- ggplot(readme_df, aes(x=reading_time, group=as.factor(subdir))) + +geom_density(aes(color = as.factor(subdir), fill=as.factor(subdir)), alpha=0.2, position="identity") + +scale_color_manual(values = subdirColors) + +xlim(-10, 90) + +ylab("readme density") + +guides(fill="none", color="none")+ +theme_bw() +# establishing the color scheme +subdirColors <- +setNames( c('firebrick1', 'forestgreen', 'cornflowerblue') +, levels(contributing_df$subdir) ) +#plotting readme reading ease +readme_reading_ease <- ggplot(readme_df, aes(x=flesch_reading_ease, group=as.factor(subdir))) + +geom_density(aes(color = as.factor(subdir), fill=as.factor(subdir)), alpha=0.2, position="identity") + +scale_color_manual(values = subdirColors) + +xlim(-10, 90) + +ylab("readme density") + +guides(fill="none", color="none")+ +theme_bw() +#plotting readme reading time +readme_reading_time_plot <- ggplot(readme_df, aes(x=reading_time, group=as.factor(subdir))) + +geom_density(aes(color = as.factor(subdir), fill=as.factor(subdir)), alpha=0.2, position="identity") + +scale_color_manual(values = subdirColors) + +xlim(-10, 90) + +ylab("readme density") + +guides(fill="none", color="none")+ +theme_bw() +# plotting contributing reading time +contributing_reading_time_plot <- ggplot(contributing_df, aes(x=reading_time, group=as.factor(subdir))) + +geom_density(aes(color = as.factor(subdir), fill=as.factor(subdir)), alpha=0.2, position="identity") + +xlim(-10, 90) + +ylab("contributing density") + +guides(fill="none", color="none")+ +theme_bw() +# plotting contributing reading ease +contributing_reading_ease <- ggplot(contributing_df, aes(x=flesch_reading_ease, group=as.factor(subdir))) + +geom_density(aes(color = as.factor(subdir), fill=as.factor(subdir)), alpha=0.2, position="identity") + +scale_color_manual(values = subdirColors) + +ylab("contributing density") + +xlim(-10, 90) + +theme_bw() + +theme(legend.position = "top") +contributing_reading_ease +grid.arrange(contributing_reading_ease, contributing_reading_time_plot,readme_reading_ease, readme_reading_time_plot, nrow = 2) +# plotting contributing linsear writing formula +contributing_linsear_plot <- ggplot(contributing_df, aes(x=linsear_write_formula, group=as.factor(subdir))) + +geom_density(aes(color = as.factor(subdir), fill=as.factor(subdir)), alpha=0.2, position="identity") + +xlim(-30, 30) + +guides(fill="none", color="none")+ +theme_bw() +# plotting contributing reading time +contributing_reading_time_plot <- ggplot(contributing_df, aes(x=reading_time, group=as.factor(subdir))) + +geom_density(aes(color = as.factor(subdir), fill=as.factor(subdir)), alpha=0.2, position="identity") + +xlim(-10, 90) + +ylab("contributing density") + +guides(fill="none", color="none")+ +theme_bw() +# plotting contributing mcalpine eflaw +contributing_mcalpine_eflaw <- ggplot(contributing_df, aes(x=mcalpine_eflaw, group=as.factor(subdir))) + +geom_density(aes(color = as.factor(subdir), fill=as.factor(subdir)), alpha=0.2, position="identity") + +xlim(-10, 70) + +guides(fill="none", color="none")+ +theme_bw() +grid.arrange(contributing_reading_ease, contributing_reading_time_plot,contributing_linsear_plot, contributing_mcalpine_eflaw readme_reading_ease, readme_reading_time_plot, nrow = 2) +grid.arrange(contributing_reading_ease, contributing_reading_time_plot,contributing_linsear_plot, contributing_mcalpine_eflaw, readme_reading_ease, readme_reading_time_plot, nrow = 2) +readme_linsear_plot <- ggplot(readme_df, aes(x=linsear_write_formula, group=as.factor(subdir))) + +geom_density(aes(color = as.factor(subdir), fill=as.factor(subdir)), alpha=0.2, position="identity") + +scale_color_manual(values = subdirColors) + +xlim(-10, 30) + +ylab("readme density") + +guides(fill="none", color="none")+ +theme_bw() +# plotting contributing linsear writing formula +contributing_linsear_plot <- ggplot(contributing_df, aes(x=linsear_write_formula, group=as.factor(subdir))) + +geom_density(aes(color = as.factor(subdir), fill=as.factor(subdir)), alpha=0.2, position="identity") + +xlim(-10, 30) + +guides(fill="none", color="none")+ +theme_bw() +readme_mcalpine_eflaw <- ggplot(readme_df, aes(x=mcalpine_eflaw, group=as.factor(subdir))) + +geom_density(aes(color = as.factor(subdir), fill=as.factor(subdir)), alpha=0.2, position="identity") + +xlim(-10, 60) + +guides(fill="none", color="none")+ +theme_bw() +grid.arrange(contributing_reading_ease, contributing_reading_time_plot,contributing_linsear_plot, contributing_mcalpine_eflaw, readme_reading_ease, readme_reading_time_plot, readme_linsear_plot, readme_mcalpine_eflaw, nrow = 2) +# plotting contributing mcalpine eflaw +contributing_mcalpine_eflaw <- ggplot(contributing_df, aes(x=mcalpine_eflaw, group=as.factor(subdir))) + +scale_color_manual(values = subdirColors) + +geom_density(aes(color = as.factor(subdir), fill=as.factor(subdir)), alpha=0.2, position="identity") + +xlim(-10, 60) + +guides(fill="none", color="none")+ +theme_bw() +#plotting readme reading ease +readme_reading_ease <- ggplot(readme_df, aes(x=flesch_reading_ease, group=as.factor(subdir))) + +geom_density(aes(color = as.factor(subdir), fill=as.factor(subdir)), alpha=0.2, position="identity") + +scale_color_manual(values = subdirColors) + +xlim(-5, 90) + +ylab("readme density") + +guides(fill="none", color="none")+ +theme_bw() +#plotting readme reading time +readme_reading_time_plot <- ggplot(readme_df, aes(x=reading_time, group=as.factor(subdir))) + +geom_density(aes(color = as.factor(subdir), fill=as.factor(subdir)), alpha=0.2, position="identity") + +scale_color_manual(values = subdirColors) + +xlim(-5, 90) + +ylab("readme density") + +guides(fill="none", color="none")+ +theme_bw() +readme_linsear_plot <- ggplot(readme_df, aes(x=linsear_write_formula, group=as.factor(subdir))) + +geom_density(aes(color = as.factor(subdir), fill=as.factor(subdir)), alpha=0.2, position="identity") + +scale_color_manual(values = subdirColors) + +xlim(-5, 30) + +ylab("readme density") + +guides(fill="none", color="none")+ +theme_bw() +readme_mcalpine_eflaw <- ggplot(readme_df, aes(x=mcalpine_eflaw, group=as.factor(subdir))) + +scale_color_manual(values = subdirColors) + +geom_density(aes(color = as.factor(subdir), fill=as.factor(subdir)), alpha=0.2, position="identity") + +xlim(-5, 60) + +guides(fill="none", color="none")+ +theme_bw() +# plotting contributing linsear writing formula +contributing_linsear_plot <- ggplot(contributing_df, aes(x=linsear_write_formula, group=as.factor(subdir))) + +scale_color_manual(values = subdirColors) + +geom_density(aes(color = as.factor(subdir), fill=as.factor(subdir)), alpha=0.2, position="identity") + +xlim(-5, 30) + +guides(fill="none", color="none")+ +theme_bw() +# plotting contributing reading time +contributing_reading_time_plot <- ggplot(contributing_df, aes(x=reading_time, group=as.factor(subdir))) + +scale_color_manual(values = subdirColors) + +geom_density(aes(color = as.factor(subdir), fill=as.factor(subdir)), alpha=0.2, position="identity") + +xlim(-5, 90) + +ylab("contributing density") + +guides(fill="none", color="none")+ +theme_bw() +# plotting contributing mcalpine eflaw +contributing_mcalpine_eflaw <- ggplot(contributing_df, aes(x=mcalpine_eflaw, group=as.factor(subdir))) + +scale_color_manual(values = subdirColors) + +geom_density(aes(color = as.factor(subdir), fill=as.factor(subdir)), alpha=0.2, position="identity") + +xlim(-5, 60) + +guides(fill="none", color="none")+ +theme_bw() +# plotting contributing reading ease +contributing_reading_ease <- ggplot(contributing_df, aes(x=flesch_reading_ease, group=as.factor(subdir))) + +geom_density(aes(color = as.factor(subdir), fill=as.factor(subdir)), alpha=0.2, position="identity") + +scale_color_manual(values = subdirColors) + +ylab("contributing density") + +xlim(-5, 90) + +theme_bw() + +theme(legend.position = "top") +grid.arrange(contributing_reading_ease, contributing_reading_time_plot,contributing_linsear_plot, contributing_mcalpine_eflaw, readme_reading_ease, readme_reading_time_plot, readme_linsear_plot, readme_mcalpine_eflaw, nrow = 2) +library(tidyverse) +library(plyr) +library(stringr) +try(setwd(dirname(rstudioapi::getActiveDocumentContext()$path))) +#load in data +contrib_df <- read_csv("../final_data/deb_contrib_pop_change.csv") +View(contrib_df) +expanded_contrib_data <- expand_timeseries(contrib_df[1,]) +for (i in 2:nrow(contrib_df)){ +expanded_contrib_data <- rbind(expanded_contrib_data, expand_timeseries(contrib_df[i,])) +} +#some expansion needs to happens for each project +expand_timeseries <- function(project_row) { +longer <- project_row |> +pivot_longer(cols = ends_with("new"), +names_to = "window", +values_to = "count") |> +unnest(count) |> +mutate(after_doc = as.numeric(str_detect(window, "after"))) |> +mutate(is_collab = as.numeric(str_detect(window, "collab"))) +return(longer) +} +expanded_contrib_data <- expand_timeseries(contrib_df[1,]) +View(expanded_contrib_data) diff --git a/R/documentReadabilityAnalysis.R b/R/documentReadabilityAnalysis.R index 373c0e6..bee6c23 100644 --- a/R/documentReadabilityAnalysis.R +++ b/R/documentReadabilityAnalysis.R @@ -30,7 +30,7 @@ readme_linsear_plot <- ggplot(readme_df, aes(x=linsear_write_formula, group=as.f readme_reading_ease <- ggplot(readme_df, aes(x=flesch_reading_ease, group=as.factor(subdir))) + geom_density(aes(color = as.factor(subdir), fill=as.factor(subdir)), alpha=0.2, position="identity") + scale_color_manual(values = subdirColors) + - xlim(-10, 90) + + xlim(-5, 90) + ylab("readme density") + guides(fill="none", color="none")+ theme_bw() @@ -39,10 +39,24 @@ readme_reading_ease readme_reading_time_plot <- ggplot(readme_df, aes(x=reading_time, group=as.factor(subdir))) + geom_density(aes(color = as.factor(subdir), fill=as.factor(subdir)), alpha=0.2, position="identity") + scale_color_manual(values = subdirColors) + - xlim(-10, 90) + + xlim(-5, 90) + ylab("readme density") + guides(fill="none", color="none")+ theme_bw() + +readme_linsear_plot <- ggplot(readme_df, aes(x=linsear_write_formula, group=as.factor(subdir))) + + geom_density(aes(color = as.factor(subdir), fill=as.factor(subdir)), alpha=0.2, position="identity") + + scale_color_manual(values = subdirColors) + + xlim(-5, 30) + + ylab("readme density") + + guides(fill="none", color="none")+ + theme_bw() +readme_mcalpine_eflaw <- ggplot(readme_df, aes(x=mcalpine_eflaw, group=as.factor(subdir))) + + scale_color_manual(values = subdirColors) + + geom_density(aes(color = as.factor(subdir), fill=as.factor(subdir)), alpha=0.2, position="identity") + + xlim(-5, 60) + + guides(fill="none", color="none")+ + theme_bw() #theme(axis.title.y=element_blank()) #plot of reading_ease #readme_df <- readme_df |> @@ -57,21 +71,24 @@ median(contributing_df$reading_time) median(contributing_df$linsear_write_formula) # plotting contributing linsear writing formula contributing_linsear_plot <- ggplot(contributing_df, aes(x=linsear_write_formula, group=as.factor(subdir))) + + scale_color_manual(values = subdirColors) + geom_density(aes(color = as.factor(subdir), fill=as.factor(subdir)), alpha=0.2, position="identity") + - xlim(-30, 30) + + xlim(-5, 30) + guides(fill="none", color="none")+ theme_bw() # plotting contributing reading time contributing_reading_time_plot <- ggplot(contributing_df, aes(x=reading_time, group=as.factor(subdir))) + + scale_color_manual(values = subdirColors) + geom_density(aes(color = as.factor(subdir), fill=as.factor(subdir)), alpha=0.2, position="identity") + - xlim(-10, 90) + + xlim(-5, 90) + ylab("contributing density") + guides(fill="none", color="none")+ theme_bw() # plotting contributing mcalpine eflaw contributing_mcalpine_eflaw <- ggplot(contributing_df, aes(x=mcalpine_eflaw, group=as.factor(subdir))) + + scale_color_manual(values = subdirColors) + geom_density(aes(color = as.factor(subdir), fill=as.factor(subdir)), alpha=0.2, position="identity") + - xlim(-10, 70) + + xlim(-5, 60) + guides(fill="none", color="none")+ theme_bw() # plotting contributing reading ease @@ -79,8 +96,8 @@ contributing_reading_ease <- ggplot(contributing_df, aes(x=flesch_reading_ease, geom_density(aes(color = as.factor(subdir), fill=as.factor(subdir)), alpha=0.2, position="identity") + scale_color_manual(values = subdirColors) + ylab("contributing density") + - xlim(-10, 90) + + xlim(-5, 90) + theme_bw() + theme(legend.position = "top") contributing_reading_ease -grid.arrange(contributing_reading_ease, contributing_reading_time_plot,readme_reading_ease, readme_reading_time_plot, nrow = 2) +grid.arrange(contributing_reading_ease, contributing_reading_time_plot,contributing_linsear_plot, contributing_mcalpine_eflaw, readme_reading_ease, readme_reading_time_plot, readme_linsear_plot, readme_mcalpine_eflaw, nrow = 2) diff --git a/R/draft_readability_analysis_plot.png b/R/draft_readability_analysis_plot.png index f900f25..8f94969 100644 Binary files a/R/draft_readability_analysis_plot.png and b/R/draft_readability_analysis_plot.png differ diff --git a/R/final_models/.DS_Store b/R/final_models/.DS_Store new file mode 100644 index 0000000..5008ddf Binary files /dev/null and b/R/final_models/.DS_Store differ diff --git a/R/final_models/0510_pop_contrib_collab.rda b/R/final_models/0510_pop_contrib_collab.rda deleted file mode 100644 index e980ed3..0000000 Binary files a/R/final_models/0510_pop_contrib_collab.rda and /dev/null differ diff --git a/R/final_models/0510_pop_contrib_contrib.rda b/R/final_models/0510_pop_contrib_contrib.rda deleted file mode 100644 index dddb034..0000000 Binary files a/R/final_models/0510_pop_contrib_contrib.rda and /dev/null differ diff --git a/R/final_models/0510_pop_rm_collab.rda b/R/final_models/0510_pop_rm_collab.rda deleted file mode 100644 index 1bb0da8..0000000 Binary files a/R/final_models/0510_pop_rm_collab.rda and /dev/null differ diff --git a/R/final_models/0510_pop_rm_contrib.rda b/R/final_models/0510_pop_rm_contrib.rda deleted file mode 100644 index 8fdd753..0000000 Binary files a/R/final_models/0510_pop_rm_contrib.rda and /dev/null differ diff --git a/R/final_models/0510_rm_all.rda b/R/final_models/0510_rm_all.rda deleted file mode 100644 index 884169b..0000000 Binary files a/R/final_models/0510_rm_all.rda and /dev/null differ diff --git a/R/final_models/0512_contrib_all.rda b/R/final_models/0512_contrib_all.rda deleted file mode 100644 index efcee32..0000000 Binary files a/R/final_models/0512_contrib_all.rda and /dev/null differ diff --git a/R/popRDDAnalyssis.R b/R/popRDDAnalyssis.R index 2dd3e74..399f42c 100644 --- a/R/popRDDAnalyssis.R +++ b/R/popRDDAnalyssis.R @@ -36,28 +36,39 @@ contrib_pop_contrib <- expanded_contrib_data[which(expanded_contrib_data$is_coll #import models library(lme4) library(optimx) -collab_readme_model <- glmer.nb(log1pcount ~ after_doc + (after_doc| upstream_vcs_link), data=collab_pop_readme) +library(MASS) +simple_collab_readme_model <- glm.nb(count ~ as.factor(after_doc), data=collab_pop_readme) +summary(simple_collab_readme_model) +qqnorm(residuals(simple_collab_readme_model)) +# +cor.test(collab_pop_readme$count, collab_pop_readme$after_doc) +# I don't think MLM is the right one +collab_readme_model <- glmer.nb(log1pcount ~ as.factor(after_doc) + (after_doc| upstream_vcs_link), data=collab_pop_readme) summary(collab_readme_model) -saveRDS(collab_readme_model, "0510_pop_rm_collab.rda") +saveRDS(collab_readme_model, "final_models/0623_pop_rm_collab.rda") crm_residuals <- residuals(collab_readme_model) qqnorm(crm_residuals) contrib_readme_model <- glmer.nb(log1pcount ~ after_doc + (after_doc| upstream_vcs_link), data=contrib_pop_readme) summary(contrib_readme_model) -saveRDS(contrib_readme_model, "0510_pop_rm_contrib.rda") +saveRDS(contrib_readme_model, "final_models/0623_pop_rm_contrib.rda") +#contrib_readme_model <- load("final_models/0510_pop_rm_contrib.rda") conrm_residuals <- residuals(contrib_readme_model) qqnorm(conrm_residuals) collab_contrib_model <- glmer.nb(log1pcount ~ after_doc + (after_doc| upstream_vcs_link), data=collab_pop_contrib) summary(collab_contrib_model) -saveRDS(collab_contrib_model, "0510_pop_contrib_collab.rda") +saveRDS(collab_contrib_model, "final_models/0623_pop_contrib_collab.rda") contrib_contrib_model <- glmer.nb(log1pcount ~ after_doc + (after_doc| upstream_vcs_link), data=contrib_pop_contrib) summary(contrib_contrib_model) -saveRDS(contrib_contrib_model, "0510_pop_contrib_contrib.rda") +saveRDS(contrib_contrib_model, "final_models/0623_pop_contrib_contrib.rda") library(ggplot2) +contrib_pop_readme |> + ggplot(aes(x = after_doc, y = log1pcount)) + + expanded_readme_data |> - ggplot(aes(x = after_doc, y = log1pcount, col = as.factor(is_collab))) + - geom_point() + geom_jitter() + ggplot(aes(x = after_doc, y = log1pcount, col = as.factor(after_doc))) + + geom_violin() expanded_contrib_data |> - ggplot(aes(x = after_doc, y = count, col = as.factor(is_collab))) + - geom_point() + geom_jitter() + ggplot(aes(x = after_doc, y = count, col = as.factor(after_doc))) + + geom_violin()