2024-08-24 22:04:46 +00:00
|
|
|
, c(0,1,2) )
|
|
|
|
contrib_g <- contrib_groupings |>
|
|
|
|
ggplot(aes(x=rank, y=estimate, col = as.factor(ranef_grouping))) +
|
|
|
|
geom_linerange(aes(ymin= conf.low, ymax= conf.high)) +
|
|
|
|
scale_color_manual(values = subdirColors, labels=c('CI < 0', '0 in CI', '0 < CI')) +
|
|
|
|
theme_bw() +
|
|
|
|
theme(legend.position = "top") +
|
|
|
|
labs(x="RE Coefficient Rank", y="RE Coefficient Estimate", color="Estimate Grouping")
|
|
|
|
contrib_g
|
|
|
|
subdirColors <-
|
|
|
|
setNames( c('#942e55', '#78c58a', '#9b6e29')
|
|
|
|
, c(0,1,2) )
|
|
|
|
contrib_g <- contrib_groupings |>
|
|
|
|
ggplot(aes(x=rank, y=estimate, col = as.factor(ranef_grouping))) +
|
|
|
|
geom_linerange(aes(ymin= conf.low, ymax= conf.high)) +
|
|
|
|
scale_color_manual(values = subdirColors, labels=c('CI < 0', '0 in CI', '0 < CI')) +
|
|
|
|
theme_bw() +
|
|
|
|
theme(legend.position = "top") +
|
|
|
|
labs(x="RE Coefficient Rank", y="RE Coefficient Estimate", color="Estimate Grouping")
|
|
|
|
contrib_g
|
|
|
|
subdirColors <-
|
|
|
|
setNames( c('#a1a596', '#557784', '#2f6382')
|
|
|
|
, c(0,1,2) )
|
|
|
|
contrib_g <- contrib_groupings |>
|
|
|
|
ggplot(aes(x=rank, y=estimate, col = as.factor(ranef_grouping))) +
|
|
|
|
geom_linerange(aes(ymin= conf.low, ymax= conf.high)) +
|
|
|
|
scale_color_manual(values = subdirColors, labels=c('CI < 0', '0 in CI', '0 < CI')) +
|
|
|
|
theme_bw() +
|
|
|
|
theme(legend.position = "top") +
|
|
|
|
labs(x="RE Coefficient Rank", y="RE Coefficient Estimate", color="Estimate Grouping")
|
|
|
|
contrib_g
|
|
|
|
subdirColors <-
|
|
|
|
setNames( c('#a3b0c9', '#101f31', '#28578d')
|
|
|
|
, c(0,1,2) )
|
|
|
|
readme_g <- readme_groupings |>
|
|
|
|
ggplot(aes(x=rank, y=estimate, col = as.factor(ranef_grouping))) +
|
|
|
|
geom_linerange(aes(ymin= conf.low, ymax= conf.high)) +
|
|
|
|
scale_color_manual(values = subdirColors) +
|
|
|
|
guides(fill="none", color="none")+
|
|
|
|
theme_bw() +
|
|
|
|
labs(x="RE Coefficient Rank", y="RE Coefficient Estimate", color="Estimate Grouping")
|
|
|
|
readme_g
|
|
|
|
contrib_g <- contrib_groupings |>
|
|
|
|
ggplot(aes(x=rank, y=estimate, col = as.factor(ranef_grouping))) +
|
|
|
|
geom_linerange(aes(ymin= conf.low, ymax= conf.high)) +
|
|
|
|
scale_color_manual(values = subdirColors, labels=c('CI < 0', '0 in CI', '0 < CI')) +
|
|
|
|
theme_bw() +
|
|
|
|
theme(legend.position = "top") +
|
|
|
|
labs(x="RE Coefficient Rank", y="RE Coefficient Estimate", color="Estimate Grouping")
|
|
|
|
contrib_g
|
|
|
|
subdirColors <-
|
|
|
|
setNames( c('#f8f06b', '#ca7780', '#a13795')
|
|
|
|
, c(0,1,2) )
|
|
|
|
contrib_g <- contrib_groupings |>
|
|
|
|
ggplot(aes(x=rank, y=estimate, col = as.factor(ranef_grouping))) +
|
|
|
|
geom_linerange(aes(ymin= conf.low, ymax= conf.high)) +
|
|
|
|
scale_color_manual(values = subdirColors, labels=c('CI < 0', '0 in CI', '0 < CI')) +
|
|
|
|
theme_bw() +
|
|
|
|
theme(legend.position = "top") +
|
|
|
|
labs(x="RE Coefficient Rank", y="RE Coefficient Estimate", color="Estimate Grouping")
|
|
|
|
contrib_g
|
2024-07-16 15:05:21 +00:00
|
|
|
library(tidyverse)
|
|
|
|
library(plyr)
|
2024-08-24 22:04:46 +00:00
|
|
|
library(gridExtra)
|
|
|
|
library(ggpubr)
|
|
|
|
# script for the analysis of document readability metrics
|
|
|
|
# readability metrics will be studied controlled by their length
|
|
|
|
# gaughan@u.northwestern.edu
|
|
|
|
# loading in the data
|
|
|
|
try(setwd(dirname(rstudioapi::getActiveDocumentContext()$path)))
|
|
|
|
readme_df <- read_csv("../text_analysis/dwo_readability_readme.csv")
|
|
|
|
contributing_df <- read_csv("../text_analysis/dwo_readability_contributing.csv")
|
|
|
|
# establishing the color scheme
|
|
|
|
subdirColors <-
|
|
|
|
setNames( c('#f8f06b', '#ca7780', '#a13795')
|
|
|
|
, levels(contributing_df$subdir) )
|
|
|
|
readmeSubdirColors <-
|
|
|
|
setNames( c('#ca7780', '#a13795')
|
|
|
|
, levels(readme_df$subdir) )
|
|
|
|
#plotting linsear scoring
|
|
|
|
readme_linsear_plot <- ggplot(readme_df, aes(x=linsear_write_formula, group=as.factor(subdir))) +
|
|
|
|
geom_density(aes(color = as.factor(subdir), fill=as.factor(subdir)), alpha=0.35, position="identity") +
|
|
|
|
xlim(-30, 30) +
|
|
|
|
theme_bw()
|
|
|
|
#plotting readme reading ease
|
|
|
|
readme_reading_ease <- ggplot(readme_df, aes(x=flesch_reading_ease, group=as.factor(subdir))) +
|
|
|
|
geom_density(aes(fill=as.factor(subdir)), alpha=0.35, position="identity") +
|
|
|
|
scale_fill_manual(values = readmeSubdirColors) +
|
|
|
|
xlim(-5, 90) +
|
|
|
|
labs(x= "Flesch Reading Ease", y= "README Density")+
|
|
|
|
guides(fill="none", color="none")+
|
|
|
|
theme_bw()
|
|
|
|
readme_reading_ease
|
|
|
|
#plotting readme reading time
|
|
|
|
readme_reading_time_plot <- ggplot(readme_df, aes(x=reading_time, group=as.factor(subdir))) +
|
|
|
|
geom_density(aes(fill=as.factor(subdir)), alpha=0.8, position="fill") +
|
|
|
|
scale_fill_manual(values = readmeSubdirColors) +
|
|
|
|
xlim(-5, 90) +
|
|
|
|
labs(x= "Reading Time (s)", y= NULL)+
|
|
|
|
guides(fill="none", color="none")+
|
|
|
|
theme_bw()
|
|
|
|
readme_reading_time_plot
|
|
|
|
readme_reading_time_no_group <- ggplot(readme_df, aes(x=reading_time)) +
|
|
|
|
geom_histogram(fill='forestgreen') +
|
|
|
|
xlim(-5, 190) +
|
|
|
|
ylab("Count of README Files") +
|
|
|
|
xlab("Reading Time (s)") +
|
|
|
|
ggtitle("Reading Time for README files from FLOSS Projects (n=2280)")+
|
|
|
|
guides(fill="none", color="none")+
|
|
|
|
theme_bw()
|
|
|
|
readme_reading_time_no_group
|
|
|
|
readme_linsear_plot <- ggplot(readme_df, aes(x=linsear_write_formula, group=as.factor(subdir))) +
|
|
|
|
geom_density(aes(fill=as.factor(subdir)), alpha=0.35, position="identity") +
|
|
|
|
scale_fill_manual(values = readmeSubdirColors) +
|
|
|
|
xlim(-5, 90) +
|
|
|
|
labs(x= "Linsear Write Score", y= NULL)+
|
|
|
|
guides(fill="none", color="none")+
|
|
|
|
theme_bw()
|
|
|
|
readme_linsear_plot
|
|
|
|
readme_mcalpine_eflaw <- ggplot(readme_df, aes(x=mcalpine_eflaw, group=as.factor(subdir))) +
|
|
|
|
geom_density(aes(fill=as.factor(subdir)), alpha=0.35, position="identity") +
|
|
|
|
scale_fill_manual(values = readmeSubdirColors) +
|
|
|
|
xlim(-5, 90) +
|
|
|
|
labs(x= "McAlpine EFLAW", y= NULL)+
|
|
|
|
guides(fill="none", color="none")+
|
|
|
|
theme_bw()
|
|
|
|
#theme(axis.title.y=element_blank())
|
|
|
|
#plot of reading_ease
|
|
|
|
#readme_df <- readme_df |>
|
|
|
|
# mutate(coef_grouping <- as.factor(subdir))
|
|
|
|
#test_lm <- lm(mcalpine_eflaw ~ word_count + as.factor(subdir),data=readme_df)
|
|
|
|
#summary(test_lm)
|
|
|
|
aggregate(contributing_df[, 3:10], list(contributing_df$subdir), median)
|
|
|
|
# plotting contributing linsear writing formula
|
|
|
|
contributing_linsear_plot <- ggplot(contributing_df, aes(x=linsear_write_formula, group=as.factor(subdir))) +
|
|
|
|
geom_density(aes(fill=as.factor(subdir)), alpha=0.35, position="identity") +
|
|
|
|
scale_fill_manual(values = subdirColors, labels=c('CI < 0', '0 in CI', '0 < CI')) +
|
|
|
|
labs(x= NULL, y= NULL, fill="RE Grouping")+
|
|
|
|
xlim(-5, 90) +
|
|
|
|
theme_bw() +
|
|
|
|
guides(fill="none", color="none")
|
|
|
|
# plotting contributing reading time
|
|
|
|
contributing_reading_time_plot <- ggplot(contributing_df, aes(x=reading_time, group=as.factor(subdir))) +
|
|
|
|
scale_fill_manual(values = subdirColors, labels=c('CI < 0', '0 in CI', '0 < CI')) +
|
|
|
|
geom_density(aes(fill=as.factor(subdir)), alpha=0.35, position="identity") +
|
|
|
|
xlim(-5, 90) +
|
|
|
|
labs(x= NULL, y= NULL, fill="RE Grouping")+
|
|
|
|
theme_bw() +
|
|
|
|
theme(legend.position = "inside",
|
|
|
|
legend.position.inside = c(.93, .93),
|
|
|
|
legend.justification = c("right", "top"),
|
|
|
|
legend.direction = "horizontal",
|
|
|
|
legend.margin = margin(6, 6, 6, 6))
|
|
|
|
# plotting contributing mcalpine eflaw
|
|
|
|
contributing_mcalpine_eflaw <- ggplot(contributing_df, aes(x=mcalpine_eflaw, group=as.factor(subdir))) +
|
|
|
|
scale_fill_manual(values = subdirColors, labels=c('CI < 0', '0 in CI', '0 < CI')) +
|
|
|
|
geom_density(aes(fill=as.factor(subdir)), alpha=0.35, position="identity") +
|
|
|
|
xlim(-5, 90) +
|
|
|
|
labs(x= NULL, y= NULL, fill="RE Grouping")+
|
|
|
|
theme_bw() +
|
|
|
|
theme(legend.position = "inside",
|
|
|
|
legend.position.inside = c(.93, .93),
|
|
|
|
legend.justification = c("right", "top"),
|
|
|
|
legend.direction = "vertical",
|
|
|
|
legend.margin = margin(6, 6, 6, 6))
|
|
|
|
# plotting contributing reading ease
|
|
|
|
contributing_reading_ease <- ggplot(contributing_df, aes(x=flesch_reading_ease, group=as.factor(subdir))) +
|
|
|
|
geom_density(aes(fill=as.factor(subdir)), alpha=0.35, position="identity") +
|
|
|
|
scale_fill_manual(values = subdirColors, labels=c('CI < 0', '0 in CI', '0 < CI')) +
|
|
|
|
labs(x= NULL, y="CONTRIBUTING Density", fill="RE Grouping")+
|
|
|
|
xlim(-5, 90) +
|
|
|
|
theme_bw() +
|
|
|
|
guides(fill="none", color="none")
|
|
|
|
contributing_reading_ease
|
|
|
|
grid.arrange(contributing_reading_ease, contributing_linsear_plot,contributing_mcalpine_eflaw, readme_reading_ease, readme_linsear_plot, readme_mcalpine_eflaw, nrow = 2)
|
|
|
|
readme_df$type = "README"
|
|
|
|
contributing_df$type = "CONTRIBUTING"
|
|
|
|
all_df = rbind(readme_df, contributing_df)
|
|
|
|
length_plot_all <- ggplot(all_df, aes(x=word_count, group=as.factor(type))) +
|
|
|
|
geom_density(aes(fill = as.factor(type)), color = NA, alpha=0.4, position="identity")+
|
|
|
|
xlim(-10, 500) +
|
|
|
|
labs(
|
|
|
|
x = "Word Count",
|
|
|
|
y = "Density Across Documents",
|
|
|
|
fill="Document Type"
|
|
|
|
) +
|
|
|
|
theme_bw() +
|
|
|
|
theme(legend.position = "top")
|
|
|
|
length_plot_all
|
|
|
|
grid.arrange(contributing_reading_ease, contributing_linsear_plot,contributing_mcalpine_eflaw, readme_reading_ease, readme_linsear_plot, readme_mcalpine_eflaw, nrow = 2)
|
|
|
|
length_plot_all <- ggplot(all_df, aes(x=word_count, group=as.factor(type))) +
|
|
|
|
geom_density(aes(fill = as.factor(type)), color = NA, alpha=0.4, position="identity")+
|
|
|
|
xlim(-10, 500) +
|
|
|
|
labs(
|
|
|
|
x = "Word Count",
|
|
|
|
y = "Density Across Documents",
|
|
|
|
fill="Document Type"
|
|
|
|
) +
|
|
|
|
theme_bw() +
|
|
|
|
theme(legend.position = "top")
|
|
|
|
length_plot_all
|
|
|
|
# plotting contributing reading time
|
|
|
|
contributing_reading_time_plot <- ggplot(contributing_df, aes(x=reading_time, group=as.factor(subdir))) +
|
|
|
|
scale_fill_manual(values = subdirColors, labels=c('CI < 0', '0 in CI', '0 < CI')) +
|
|
|
|
geom_density(aes(fill=as.factor(subdir)), position="fill") +
|
|
|
|
xlim(-5, 90) +
|
|
|
|
labs(x= NULL, y= NULL, fill="RE Grouping")+
|
|
|
|
theme_bw() +
|
|
|
|
theme(legend.position = "inside",
|
|
|
|
legend.position.inside = c(.93, .93),
|
|
|
|
legend.justification = c("right", "top"),
|
|
|
|
legend.direction = "horizontal",
|
|
|
|
legend.margin = margin(6, 6, 6, 6))
|
|
|
|
contributing_reading_time_plot
|
|
|
|
grid.arrange(contributing_reading_ease, contributing_reading_time_plot, readme_reading_ease, readme_reading_time_plot, nrow = 2)
|
|
|
|
#plotting readme reading time
|
|
|
|
readme_reading_time_plot <- ggplot(readme_df, aes(x=reading_time, group=as.factor(subdir))) +
|
|
|
|
geom_density(aes(fill=as.factor(subdir)), position="fill") +
|
|
|
|
scale_fill_manual(values = readmeSubdirColors) +
|
|
|
|
xlim(-5, 90) +
|
|
|
|
labs(x= "Reading Time (s)", y= NULL)+
|
|
|
|
guides(fill="none", color="none")+
|
|
|
|
theme_bw()
|
|
|
|
#plotting readme reading ease
|
|
|
|
readme_reading_ease <- ggplot(readme_df, aes(x=flesch_reading_ease, group=as.factor(subdir))) +
|
|
|
|
geom_density(aes(fill=as.factor(subdir)), position="fill") +
|
|
|
|
scale_fill_manual(values = readmeSubdirColors) +
|
|
|
|
xlim(-5, 90) +
|
|
|
|
labs(x= "Flesch Reading Ease", y= "README Density")+
|
|
|
|
guides(fill="none", color="none")+
|
|
|
|
theme_bw()
|
|
|
|
# plotting contributing reading ease
|
|
|
|
contributing_reading_ease <- ggplot(contributing_df, aes(x=flesch_reading_ease, group=as.factor(subdir))) +
|
|
|
|
geom_density(aes(fill=as.factor(subdir)), position="fill") +
|
|
|
|
scale_fill_manual(values = subdirColors, labels=c('CI < 0', '0 in CI', '0 < CI')) +
|
|
|
|
labs(x= NULL, y="CONTRIBUTING Density", fill="RE Grouping")+
|
|
|
|
xlim(-5, 90) +
|
|
|
|
theme_bw() +
|
|
|
|
guides(fill="none", color="none")
|
|
|
|
grid.arrange(contributing_reading_ease, contributing_reading_time_plot, readme_reading_ease, readme_reading_time_plot, nrow = 2)
|
|
|
|
# establishing the color scheme
|
|
|
|
subdirColors <-
|
|
|
|
setNames( c('#68293c', '#ffcf67', '#91d8f0')
|
|
|
|
, levels(contributing_df$subdir) )
|
|
|
|
readmeSubdirColors <-
|
|
|
|
setNames( c('#ffcf67', '#91d8f0')
|
|
|
|
, levels(readme_df$subdir) )
|
|
|
|
#plotting readme reading ease
|
|
|
|
readme_reading_ease <- ggplot(readme_df, aes(x=flesch_reading_ease, group=as.factor(subdir))) +
|
|
|
|
geom_density(aes(fill=as.factor(subdir)), position="fill") +
|
|
|
|
scale_fill_manual(values = readmeSubdirColors) +
|
|
|
|
xlim(-5, 90) +
|
|
|
|
labs(x= "Flesch Reading Ease", y= "README Density")+
|
|
|
|
guides(fill="none", color="none")+
|
|
|
|
theme_bw()
|
|
|
|
#plotting readme reading time
|
|
|
|
readme_reading_time_plot <- ggplot(readme_df, aes(x=reading_time, group=as.factor(subdir))) +
|
|
|
|
geom_density(aes(fill=as.factor(subdir)), position="fill") +
|
|
|
|
scale_fill_manual(values = readmeSubdirColors) +
|
|
|
|
xlim(-5, 90) +
|
|
|
|
labs(x= "Reading Time (s)", y= NULL)+
|
|
|
|
guides(fill="none", color="none")+
|
|
|
|
theme_bw()
|
|
|
|
# plotting contributing reading time
|
|
|
|
contributing_reading_time_plot <- ggplot(contributing_df, aes(x=reading_time, group=as.factor(subdir))) +
|
|
|
|
scale_fill_manual(values = subdirColors, labels=c('CI < 0', '0 in CI', '0 < CI')) +
|
|
|
|
geom_density(aes(fill=as.factor(subdir)), position="fill") +
|
|
|
|
xlim(-5, 90) +
|
|
|
|
labs(x= NULL, y= NULL, fill="RE Grouping")+
|
|
|
|
theme_bw() +
|
|
|
|
theme(legend.position = "inside",
|
|
|
|
legend.position.inside = c(.93, .93),
|
|
|
|
legend.justification = c("right", "top"),
|
|
|
|
legend.direction = "horizontal",
|
|
|
|
legend.margin = margin(6, 6, 6, 6))
|
|
|
|
# plotting contributing reading ease
|
|
|
|
contributing_reading_ease <- ggplot(contributing_df, aes(x=flesch_reading_ease, group=as.factor(subdir))) +
|
|
|
|
geom_density(aes(fill=as.factor(subdir)), position="fill") +
|
|
|
|
scale_fill_manual(values = subdirColors, labels=c('CI < 0', '0 in CI', '0 < CI')) +
|
|
|
|
labs(x= NULL, y="CONTRIBUTING Density", fill="RE Grouping")+
|
|
|
|
xlim(-5, 90) +
|
|
|
|
theme_bw() +
|
|
|
|
guides(fill="none", color="none")
|
|
|
|
grid.arrange(contributing_reading_ease, contributing_reading_time_plot, readme_reading_ease, readme_reading_time_plot, nrow = 2)
|
|
|
|
source("~/Desktop/git/24_deb_gov/R/documentReadabilityAnalysis.R")
|
|
|
|
subdirColors <-
|
|
|
|
setNames( c('#31449c', '#4a7c85', '#c5db68')
|
|
|
|
, c(0,1,2) )
|
|
|
|
contrib_g <- contrib_groupings |>
|
|
|
|
ggplot(aes(x=rank, y=estimate, col = as.factor(ranef_grouping))) +
|
|
|
|
geom_linerange(aes(ymin= conf.low, ymax= conf.high)) +
|
|
|
|
scale_color_manual(values = subdirColors, labels=c('CI < 0', '0 in CI', '0 < CI')) +
|
|
|
|
theme_bw() +
|
|
|
|
theme(legend.position = "top") +
|
|
|
|
labs(x="RE Coefficient Rank", y="RE Coefficient Estimate", color="Estimate Grouping")
|
|
|
|
contrib_g
|
|
|
|
contrib_groupings <- read.csv('../final_data/deb_contrib_interaction_groupings.csv')
|
|
|
|
subdirColors <-
|
|
|
|
setNames( c('#31449c', '#4a7c85', '#c5db68')
|
|
|
|
, c(0,1,2) )
|
|
|
|
contrib_g <- contrib_groupings |>
|
|
|
|
ggplot(aes(x=rank, y=estimate, col = as.factor(ranef_grouping))) +
|
|
|
|
geom_linerange(aes(ymin= conf.low, ymax= conf.high)) +
|
|
|
|
scale_color_manual(values = subdirColors, labels=c('CI < 0', '0 in CI', '0 < CI')) +
|
|
|
|
theme_bw() +
|
|
|
|
theme(legend.position = "top") +
|
|
|
|
labs(x="RE Coefficient Rank", y="RE Coefficient Estimate", color="Estimate Grouping")
|
|
|
|
contrib_g
|
|
|
|
source("~/Desktop/git/24_deb_gov/R/gam_plot_documents.R")
|
|
|
|
doctypeColors <-
|
|
|
|
setNames( c('#4a7c85', '#c5db68')
|
|
|
|
, factor(all_actions_data$document_type))
|
|
|
|
View(all_actions_data)
|
|
|
|
doctypeColors <-
|
|
|
|
setNames( c('#4a7c85', '#c5db68')
|
|
|
|
, c("CONTRIBUTING", "README"))
|
|
|
|
time_plot <- all_actions_data |>
|
|
|
|
ggplot(aes(x=week_offset, y=log1p_count, color=factor(document_type))) +
|
|
|
|
labs(x="Weekly Offset", y="Log Transformed Commit Count", color="Document Type") +
|
|
|
|
scale_fill_manual(values = doctypeColors) +
|
|
|
|
geom_smooth() +
|
|
|
|
geom_vline(xintercept = 0)+
|
|
|
|
theme_bw() +
|
|
|
|
theme(legend.position = "top")
|
|
|
|
time_plot
|
|
|
|
time_plot <- all_actions_data |>
|
|
|
|
ggplot(aes(x=week_offset, y=log1p_count, color=factor(document_type))) +
|
|
|
|
labs(x="Weekly Offset", y="Log Transformed Commit Count", color="Document Type") +
|
|
|
|
scale_color_manual(values = doctypeColors) +
|
|
|
|
geom_smooth() +
|
|
|
|
geom_vline(xintercept = 0)+
|
|
|
|
theme_bw() +
|
|
|
|
theme(legend.position = "top")
|
|
|
|
time_plot
|
|
|
|
doctypeColors <-
|
|
|
|
setNames( c('#ffcf67', '#91d8f0')
|
|
|
|
, c("CONTRIBUTING", "README"))
|
|
|
|
time_plot <- all_actions_data |>
|
|
|
|
ggplot(aes(x=week_offset, y=log1p_count, color=factor(document_type))) +
|
|
|
|
labs(x="Weekly Offset", y="Log Transformed Commit Count", color="Document Type") +
|
|
|
|
scale_color_manual(values = doctypeColors) +
|
|
|
|
geom_smooth() +
|
|
|
|
geom_vline(xintercept = 0)+
|
|
|
|
theme_bw() +
|
|
|
|
theme(legend.position = "top")
|
|
|
|
time_plot
|
|
|
|
doctypeColors <-
|
|
|
|
setNames( c('#7d1b16', '#263b90')
|
|
|
|
, c("CONTRIBUTING", "README"))
|
|
|
|
time_plot <- all_actions_data |>
|
|
|
|
ggplot(aes(x=week_offset, y=log1p_count, color=factor(document_type))) +
|
|
|
|
labs(x="Weekly Offset", y="Log Transformed Commit Count", color="Document Type") +
|
|
|
|
scale_color_manual(values = doctypeColors) +
|
|
|
|
geom_smooth() +
|
|
|
|
geom_vline(xintercept = 0)+
|
|
|
|
theme_bw() +
|
|
|
|
theme(legend.position = "top")
|
|
|
|
time_plot
|
|
|
|
doctypeColors <-
|
|
|
|
setNames( c('#995223', '#2464ad')
|
|
|
|
, c("CONTRIBUTING", "README"))
|
|
|
|
time_plot <- all_actions_data |>
|
|
|
|
ggplot(aes(x=week_offset, y=log1p_count, color=factor(document_type))) +
|
|
|
|
labs(x="Weekly Offset", y="Log Transformed Commit Count", color="Document Type") +
|
|
|
|
scale_color_manual(values = doctypeColors) +
|
|
|
|
geom_smooth() +
|
|
|
|
geom_vline(xintercept = 0)+
|
|
|
|
theme_bw() +
|
|
|
|
theme(legend.position = "top")
|
|
|
|
time_plot
|
|
|
|
doctypeColors <-
|
|
|
|
setNames( c('#ba6b44', '#5d7fbd')
|
|
|
|
, c("CONTRIBUTING", "README"))
|
|
|
|
time_plot <- all_actions_data |>
|
|
|
|
ggplot(aes(x=week_offset, y=log1p_count, color=factor(document_type))) +
|
|
|
|
labs(x="Weekly Offset", y="Log Transformed Commit Count", color="Document Type") +
|
|
|
|
scale_color_manual(values = doctypeColors) +
|
|
|
|
geom_smooth() +
|
|
|
|
geom_vline(xintercept = 0)+
|
|
|
|
theme_bw() +
|
|
|
|
theme(legend.position = "top")
|
|
|
|
time_plot
|
|
|
|
doctypeColors <-
|
|
|
|
setNames( c('#5da2d8', '#c7756a')
|
|
|
|
, c("CONTRIBUTING", "README"))
|
|
|
|
time_plot <- all_actions_data |>
|
|
|
|
ggplot(aes(x=week_offset, y=log1p_count, color=factor(document_type))) +
|
|
|
|
labs(x="Weekly Offset", y="Log Transformed Commit Count", color="Document Type") +
|
|
|
|
scale_color_manual(values = doctypeColors) +
|
|
|
|
geom_smooth() +
|
|
|
|
geom_vline(xintercept = 0)+
|
|
|
|
theme_bw() +
|
|
|
|
theme(legend.position = "top")
|
|
|
|
time_plot
|
|
|
|
time_plot <- all_actions_data |>
|
|
|
|
ggplot(aes(x=week_offset, y=count, color=factor(document_type))) +
|
|
|
|
scale_y_log1p() +
|
|
|
|
labs(x="Weekly Offset", y="Log Transformed Commit Count", color="Document Type") +
|
|
|
|
scale_color_manual(values = doctypeColors) +
|
|
|
|
geom_smooth() +
|
|
|
|
geom_vline(xintercept = 0)+
|
|
|
|
theme_bw() +
|
|
|
|
theme(legend.position = "top")
|
|
|
|
time_plot <- all_actions_data |>
|
|
|
|
ggplot(aes(x=week_offset, y=count, color=factor(document_type))) +
|
|
|
|
labs(x="Weekly Offset", y="Log Transformed Commit Count", color="Document Type") +
|
|
|
|
scale_color_manual(values = doctypeColors) +
|
|
|
|
geom_smooth() +
|
|
|
|
geom_vline(xintercept = 0)+
|
|
|
|
theme_bw() +
|
|
|
|
theme(legend.position = "top")
|
|
|
|
time_plot
|
|
|
|
time_plot <- all_actions_data |>
|
|
|
|
ggplot(aes(x=week_offset, y=log1p_count, color=factor(document_type))) +
|
|
|
|
labs(x="Weekly Offset", y="Log Transformed Commit Count", color="Document Type") +
|
|
|
|
scale_color_manual(values = doctypeColors) +
|
|
|
|
geom_smooth() +
|
|
|
|
geom_vline(xintercept = 0)+
|
|
|
|
theme_bw() +
|
|
|
|
theme(legend.position = "top")
|
|
|
|
time_plot
|
|
|
|
time_plot <- all_actions_data |>
|
|
|
|
ggplot(aes(x=week_offset, y=log1p_count, color=factor(document_type))) +
|
|
|
|
scale_y_continuous(breaks = c(0, 0.5, 1.0, 1.5),
|
|
|
|
labels = round(c(expm1(0), expm1(0.5), expm1(1.0), exp,1(1.5)), 1))
|
|
|
|
time_plot <- all_actions_data |>
|
|
|
|
ggplot(aes(x=week_offset, y=log1p_count, color=factor(document_type))) +
|
|
|
|
scale_y_continuous(breaks = c(0, 0.5, 1.0, 1.5),
|
|
|
|
labels = round(c(expm1(0), expm1(0.5), expm1(1.0), exp,1(1.5)), 1)) +
|
|
|
|
labs(x="Weekly Offset", y="Log Transformed Commit Count", color="Document Type") +
|
|
|
|
scale_color_manual(values = doctypeColors) +
|
|
|
|
geom_smooth() +
|
|
|
|
geom_vline(xintercept = 0)+
|
|
|
|
theme_bw() +
|
|
|
|
theme(legend.position = "top")
|
|
|
|
time_plot <- all_actions_data |>
|
|
|
|
ggplot(aes(x=week_offset, y=log1p_count, color=factor(document_type))) +
|
|
|
|
scale_y_continuous(breaks = c(0, 0.5, 1.0, 1.5),
|
|
|
|
labels = round(c(expm1(0), expm1(0.5), expm1(1.0), expm1(1.5)), 1)) +
|
|
|
|
labs(x="Weekly Offset", y="Log Transformed Commit Count", color="Document Type") +
|
|
|
|
scale_color_manual(values = doctypeColors) +
|
|
|
|
geom_smooth() +
|
|
|
|
geom_vline(xintercept = 0)+
|
|
|
|
theme_bw() +
|
|
|
|
theme(legend.position = "top")
|
|
|
|
time_plot
|
|
|
|
time_plot <- all_actions_data |>
|
|
|
|
ggplot(aes(x=week_offset, y=log1p_count, color=factor(document_type))) +
|
|
|
|
scale_y_continuous(breaks = c(0, 0.5, 1.0, 1.5),
|
|
|
|
labels = round(c(expm1(0), expm1(0.5), expm1(1.0), expm1(1.5)), 1)) +
|
|
|
|
labs(x="Weekly Offset", y="Commit Count", color="Document Type") +
|
|
|
|
scale_color_manual(values = doctypeColors) +
|
|
|
|
geom_smooth() +
|
|
|
|
geom_vline(xintercept = 0)+
|
|
|
|
theme_bw() +
|
|
|
|
theme(legend.position = "top")
|
|
|
|
time_plot
|
|
|
|
time_plot <- all_actions_data |>
|
|
|
|
ggplot(aes(x=week_offset, y=log1p_count, color=factor(document_type))) +
|
|
|
|
labs(x="Weekly Offset", y="Commit Count", color="Document Type") +
|
|
|
|
scale_color_manual(values = doctypeColors) +
|
|
|
|
geom_smooth() +
|
|
|
|
geom_vline(xintercept = 0)+
|
|
|
|
theme_bw() +
|
|
|
|
theme(legend.position = "top")
|
|
|
|
time_plot
|
|
|
|
time_plot <- all_actions_data |>
|
|
|
|
ggplot(aes(x=week_offset, y=log1p_count, color=factor(document_type))) +
|
|
|
|
scale_y_continuous(breaks = c(0, 0.5, 1.0, 1.5),
|
|
|
|
labels = round(c(expm1(0), expm1(0.5), expm1(1.0), expm1(1.5)), 1)) +
|
|
|
|
labs(x="Weekly Offset", y="Commit Count", color="Document Type") +
|
|
|
|
scale_color_manual(values = doctypeColors) +
|
|
|
|
geom_smooth() +
|
|
|
|
geom_vline(xintercept = 0)+
|
|
|
|
theme_bw() +
|
|
|
|
theme(legend.position = "top")
|
|
|
|
time_plot
|
|
|
|
time_plot <- all_actions_data |>
|
|
|
|
ggplot(aes(x=week_offset, y=log1p_count, color=factor(document_type))) +
|
|
|
|
labs(x="Weekly Offset", y="Commit Count", color="Document Type") +
|
|
|
|
scale_color_manual(values = doctypeColors) +
|
|
|
|
geom_smooth() +
|
|
|
|
geom_vline(xintercept = 0)+
|
|
|
|
theme_bw() +
|
|
|
|
theme(legend.position = "top")
|
|
|
|
time_plot
|
|
|
|
source("~/Desktop/git/24_deb_gov/R/documentReadabilityAnalysis.R")
|
|
|
|
length_plot_all
|
|
|
|
length_plot_all <- ggplot(all_df, aes(x=word_count, group=as.factor(type))) +
|
|
|
|
geom_density(aes(fill = as.factor(type)), color = NA, alpha=0.5, position="identity")+
|
|
|
|
scale_fill_manual(values = doctypeColors) +
|
|
|
|
xlim(-10, 500) +
|
|
|
|
labs(
|
|
|
|
x = "Word Count",
|
|
|
|
y = "Density Across Documents",
|
|
|
|
fill="Document Type"
|
|
|
|
) +
|
|
|
|
theme_bw() +
|
|
|
|
theme(legend.position = "top")
|
|
|
|
length_plot_all
|
|
|
|
length_plot_all <- ggplot(all_df, aes(x=word_count, group=as.factor(type))) +
|
|
|
|
geom_density(aes(fill = as.factor(type)), color = NA, alpha=0.6, position="identity")+
|
|
|
|
scale_fill_manual(values = doctypeColors) +
|
|
|
|
xlim(-10, 500) +
|
|
|
|
labs(
|
|
|
|
x = "Word Count",
|
|
|
|
y = "Density Across Documents",
|
|
|
|
fill="Document Type"
|
|
|
|
) +
|
|
|
|
theme_bw() +
|
|
|
|
theme(legend.position = "top")
|
|
|
|
length_plot_all
|