library(tidyverse) library(plyr) library(gridExtra) library(ggpubr) # script for the analysis of document readability metrics # readability metrics will be studied controlled by their length # gaughan@u.northwestern.edu # loading in the data try(setwd(dirname(rstudioapi::getActiveDocumentContext()$path))) readme_df <- read_csv("../text_analysis/draft_readability_readme.csv") contributing_df <- read_csv("../text_analysis/draft_readability_contributing.csv") head(readme_df) aggregate(readme_df[, 3:10], list(readme_df$subdir), median) #getting basic stats for the readme readability median(readme_df$flesch_reading_ease) median(readme_df$mcalpine_eflaw) median(readme_df$reading_time) # establishing the color scheme subdirColors <- setNames( c('red', 'forestgreen', 'blue') , levels(contributing_df$subdir) ) #plotting linsear scoring readme_linsear_plot <- ggplot(readme_df, aes(x=linsear_write_formula, group=as.factor(subdir))) + geom_density(aes(color = as.factor(subdir), fill=as.factor(subdir)), alpha=0.2, position="identity") + xlim(-30, 30) + theme_bw() #plotting readme reading ease readme_reading_ease <- ggplot(readme_df, aes(x=flesch_reading_ease, group=as.factor(subdir))) + geom_density(aes(color = as.factor(subdir), fill=as.factor(subdir)), alpha=0.2, position="identity") + scale_color_manual(values = subdirColors) + xlim(-10, 90) + ylab("readme density") + guides(fill="none", color="none")+ theme_bw() readme_reading_ease #plotting readme reading time readme_reading_time_plot <- ggplot(readme_df, aes(x=reading_time, group=as.factor(subdir))) + geom_density(aes(color = as.factor(subdir), fill=as.factor(subdir)), alpha=0.2, position="identity") + scale_color_manual(values = subdirColors) + xlim(-10, 90) + ylab("readme density") + guides(fill="none", color="none")+ theme_bw() #theme(axis.title.y=element_blank()) #plot of reading_ease #readme_df <- readme_df |> # mutate(coef_grouping <- as.factor(subdir)) #test_lm <- lm(mcalpine_eflaw ~ word_count + as.factor(subdir),data=readme_df) #summary(test_lm) aggregate(contributing_df[, 3:10], list(contributing_df$subdir), median) #basic stats for the contributing readability median(contributing_df$flesch_reading_ease) median(contributing_df$mcalpine_eflaw) median(contributing_df$reading_time) # plotting contributing linsear writing formula contributing_linsear_plot <- ggplot(contributing_df, aes(x=linsear_write_formula, group=as.factor(subdir))) + geom_density(aes(color = as.factor(subdir), fill=as.factor(subdir)), alpha=0.2, position="identity") + xlim(-30, 30) + guides(fill="none", color="none")+ theme_bw() # plotting contributing reading time contributing_reading_time_plot <- ggplot(contributing_df, aes(x=reading_time, group=as.factor(subdir))) + geom_density(aes(color = as.factor(subdir), fill=as.factor(subdir)), alpha=0.2, position="identity") + xlim(-10, 90) + ylab("contributing density") + guides(fill="none", color="none")+ theme_bw() # plotting contributing mcalpine eflaw contributing_mcalpine_eflaw <- ggplot(contributing_df, aes(x=mcalpine_eflaw, group=as.factor(subdir))) + geom_density(aes(color = as.factor(subdir), fill=as.factor(subdir)), alpha=0.2, position="identity") + xlim(-10, 70) + guides(fill="none", color="none")+ theme_bw() # plotting contributing reading ease contributing_reading_ease <- ggplot(contributing_df, aes(x=flesch_reading_ease, group=as.factor(subdir))) + geom_density(aes(color = as.factor(subdir), fill=as.factor(subdir)), alpha=0.2, position="identity") + scale_color_manual(values = subdirColors) + ylab("contributing density") + xlim(-10, 90) + theme_bw() + theme(legend.position = "top") contributing_reading_ease grid.arrange(contributing_reading_ease, contributing_reading_time_plot,readme_reading_ease, readme_reading_time_plot, nrow = 2)