2025-02-04 04:50:34 +00:00
|
|
|
library(tidyverse)
|
2025-02-08 00:59:56 +00:00
|
|
|
readme_groupings <- read.csv('text_analysis/0207_readme_merged_manifest.csv')
|
|
|
|
contrib_groupings <- read.csv('text_analysis/0207_contributing_merged_manifest.csv')
|
2025-02-04 04:50:34 +00:00
|
|
|
contrib_groupings$filename <- contrib_groupings$fvf_filepath
|
|
|
|
readme_groupings$filename <- readme_groupings$fvf_filepath
|
2025-02-08 00:59:56 +00:00
|
|
|
readme_textstat <- read.csv('text_analysis/020725_README_readability.csv')
|
|
|
|
contributing_textstat <- read.csv('text_analysis/020725_CONTRIBUTING_readability.csv')
|
2025-02-04 04:50:34 +00:00
|
|
|
|
|
|
|
|
|
|
|
doctypeColors <-
|
2025-02-08 00:59:56 +00:00
|
|
|
setNames( c('#c7756a','#5da2d8')
|
|
|
|
, c("README","CONTRIBUTING"))
|
2025-02-04 04:50:34 +00:00
|
|
|
readme_textstat$type = "README"
|
|
|
|
contributing_textstat$type = "CONTRIBUTING"
|
2025-02-08 00:59:56 +00:00
|
|
|
all_df = rbind(contributing_textstat,readme_textstat)
|
|
|
|
all_df$type <- factor(all_df$type, levels = c("CONTRIBUTING", "README"))
|
|
|
|
|
2025-02-04 04:50:34 +00:00
|
|
|
length_plot_all <- ggplot(all_df, aes(x=word_count, group=as.factor(type))) +
|
|
|
|
geom_density(aes(fill = as.factor(type)), color = NA, alpha=0.6, position="identity")+
|
|
|
|
scale_fill_manual(values = doctypeColors) +
|
2025-02-08 00:59:56 +00:00
|
|
|
xlim(-10, 600) +
|
2025-02-04 04:50:34 +00:00
|
|
|
labs(
|
|
|
|
x = "Word Count",
|
|
|
|
y = "Density Across Documents",
|
|
|
|
fill="Document Type"
|
|
|
|
) +
|
|
|
|
theme_bw() +
|
|
|
|
theme(legend.position = "top")
|
|
|
|
length_plot_all
|
|
|
|
|
2025-02-08 00:59:56 +00:00
|
|
|
#ggsave(filename = "plots/cr-0207-wc-density.png", plot = length_plot_all, width = 9, height = 9, dpi = 800)
|
2025-02-04 04:50:34 +00:00
|
|
|
|
2025-02-08 00:59:56 +00:00
|
|
|
contributing_df <- inner_join(contributing_textstat, contrib_groupings, by=c("filename"="new_filepath"))
|
|
|
|
readme_df <- inner_join(readme_textstat, readme_groupings, by=c("filename"="new_filepath"))
|
2025-02-04 04:50:34 +00:00
|
|
|
|
|
|
|
subdirColors <-
|
|
|
|
setNames( c('#31449c', '#4a7c85', '#c5db68')
|
|
|
|
, c(0,1,2) )
|
|
|
|
|
|
|
|
contributing_reading_time_plot <- ggplot(contributing_df, aes(x=reading_time, group=as.factor(ranef_grouping))) +
|
|
|
|
scale_fill_manual(values = subdirColors, labels=c('CI < 0', '0 in CI', '0 < CI')) +
|
|
|
|
geom_density(aes(fill=as.factor(ranef_grouping)), position="fill") +
|
|
|
|
xlim(-5, 90) +
|
|
|
|
labs(x= NULL, y= NULL, fill="RE Grouping")+
|
|
|
|
theme_bw() +
|
|
|
|
theme(legend.position = "inside",
|
2025-02-08 00:59:56 +00:00
|
|
|
legend.position.inside = c(.89, .92),
|
2025-02-04 04:50:34 +00:00
|
|
|
legend.justification = c("right", "top"),
|
|
|
|
legend.direction = "horizontal",
|
|
|
|
legend.margin = margin(6, 6, 6, 6))
|
|
|
|
contributing_reading_time_plot
|
|
|
|
|
|
|
|
contributing_reading_ease <- ggplot(contributing_df, aes(x=flesch_reading_ease, group=as.factor(ranef_grouping))) +
|
|
|
|
geom_density(aes(fill=as.factor(ranef_grouping)), position="fill") +
|
|
|
|
scale_fill_manual(values = subdirColors, labels=c('CI < 0', '0 in CI', '0 < CI')) +
|
|
|
|
labs(x= NULL, y="CONTRIBUTING Density", fill="RE Grouping")+
|
|
|
|
xlim(-5, 90) +
|
|
|
|
theme_bw() +
|
|
|
|
guides(fill="none", color="none")
|
|
|
|
#contributing_reading_ease
|
|
|
|
|
|
|
|
readme_reading_time_plot <- ggplot(readme_df, aes(x=reading_time, group=as.factor(ranef_grouping))) +
|
|
|
|
geom_density(aes(fill=as.factor(ranef_grouping)), position="fill") +
|
|
|
|
scale_fill_manual(values = subdirColors) +
|
|
|
|
xlim(-5, 90) +
|
|
|
|
labs(x= "Reading Time (s)", y= NULL)+
|
|
|
|
guides(fill="none", color="none")+
|
|
|
|
theme_bw()
|
2025-02-08 00:59:56 +00:00
|
|
|
readme_reading_time_plot
|
2025-02-04 04:50:34 +00:00
|
|
|
|
|
|
|
readme_reading_ease <- ggplot(readme_df, aes(x=flesch_reading_ease, group=as.factor(ranef_grouping))) +
|
|
|
|
geom_density(aes(fill=as.factor(ranef_grouping)), position="fill") +
|
|
|
|
scale_fill_manual(values = subdirColors) +
|
|
|
|
xlim(-5, 90) +
|
|
|
|
labs(x= "Flesch Reading Ease", y= "README Density")+
|
|
|
|
guides(fill="none", color="none")+
|
|
|
|
theme_bw()
|
2025-02-08 00:59:56 +00:00
|
|
|
readme_reading_ease
|
2025-02-04 04:50:34 +00:00
|
|
|
library(gridExtra)
|
|
|
|
grid.arrange(contributing_reading_ease, contributing_reading_time_plot, readme_reading_ease, readme_reading_time_plot, nrow = 2)
|
|
|
|
|