24_deb_pkg_gov/R/.Rhistory

513 lines
20 KiB
R
Raw Permalink Normal View History

2024-08-24 22:04:46 +00:00
, c(0,1,2) )
contrib_g <- contrib_groupings |>
ggplot(aes(x=rank, y=estimate, col = as.factor(ranef_grouping))) +
geom_linerange(aes(ymin= conf.low, ymax= conf.high)) +
scale_color_manual(values = subdirColors, labels=c('CI < 0', '0 in CI', '0 < CI')) +
theme_bw() +
theme(legend.position = "top") +
labs(x="RE Coefficient Rank", y="RE Coefficient Estimate", color="Estimate Grouping")
contrib_g
subdirColors <-
setNames( c('#942e55', '#78c58a', '#9b6e29')
, c(0,1,2) )
contrib_g <- contrib_groupings |>
ggplot(aes(x=rank, y=estimate, col = as.factor(ranef_grouping))) +
geom_linerange(aes(ymin= conf.low, ymax= conf.high)) +
scale_color_manual(values = subdirColors, labels=c('CI < 0', '0 in CI', '0 < CI')) +
theme_bw() +
theme(legend.position = "top") +
labs(x="RE Coefficient Rank", y="RE Coefficient Estimate", color="Estimate Grouping")
contrib_g
subdirColors <-
setNames( c('#a1a596', '#557784', '#2f6382')
, c(0,1,2) )
contrib_g <- contrib_groupings |>
ggplot(aes(x=rank, y=estimate, col = as.factor(ranef_grouping))) +
geom_linerange(aes(ymin= conf.low, ymax= conf.high)) +
scale_color_manual(values = subdirColors, labels=c('CI < 0', '0 in CI', '0 < CI')) +
theme_bw() +
theme(legend.position = "top") +
labs(x="RE Coefficient Rank", y="RE Coefficient Estimate", color="Estimate Grouping")
contrib_g
subdirColors <-
setNames( c('#a3b0c9', '#101f31', '#28578d')
, c(0,1,2) )
readme_g <- readme_groupings |>
ggplot(aes(x=rank, y=estimate, col = as.factor(ranef_grouping))) +
geom_linerange(aes(ymin= conf.low, ymax= conf.high)) +
scale_color_manual(values = subdirColors) +
guides(fill="none", color="none")+
theme_bw() +
labs(x="RE Coefficient Rank", y="RE Coefficient Estimate", color="Estimate Grouping")
readme_g
contrib_g <- contrib_groupings |>
ggplot(aes(x=rank, y=estimate, col = as.factor(ranef_grouping))) +
geom_linerange(aes(ymin= conf.low, ymax= conf.high)) +
scale_color_manual(values = subdirColors, labels=c('CI < 0', '0 in CI', '0 < CI')) +
theme_bw() +
theme(legend.position = "top") +
labs(x="RE Coefficient Rank", y="RE Coefficient Estimate", color="Estimate Grouping")
contrib_g
subdirColors <-
setNames( c('#f8f06b', '#ca7780', '#a13795')
, c(0,1,2) )
contrib_g <- contrib_groupings |>
ggplot(aes(x=rank, y=estimate, col = as.factor(ranef_grouping))) +
geom_linerange(aes(ymin= conf.low, ymax= conf.high)) +
scale_color_manual(values = subdirColors, labels=c('CI < 0', '0 in CI', '0 < CI')) +
theme_bw() +
theme(legend.position = "top") +
labs(x="RE Coefficient Rank", y="RE Coefficient Estimate", color="Estimate Grouping")
contrib_g
2024-07-16 15:05:21 +00:00
library(tidyverse)
library(plyr)
2024-08-24 22:04:46 +00:00
library(gridExtra)
library(ggpubr)
# script for the analysis of document readability metrics
# readability metrics will be studied controlled by their length
# gaughan@u.northwestern.edu
# loading in the data
try(setwd(dirname(rstudioapi::getActiveDocumentContext()$path)))
readme_df <- read_csv("../text_analysis/dwo_readability_readme.csv")
contributing_df <- read_csv("../text_analysis/dwo_readability_contributing.csv")
# establishing the color scheme
subdirColors <-
setNames( c('#f8f06b', '#ca7780', '#a13795')
, levels(contributing_df$subdir) )
readmeSubdirColors <-
setNames( c('#ca7780', '#a13795')
, levels(readme_df$subdir) )
#plotting linsear scoring
readme_linsear_plot <- ggplot(readme_df, aes(x=linsear_write_formula, group=as.factor(subdir))) +
geom_density(aes(color = as.factor(subdir), fill=as.factor(subdir)), alpha=0.35, position="identity") +
xlim(-30, 30) +
theme_bw()
#plotting readme reading ease
readme_reading_ease <- ggplot(readme_df, aes(x=flesch_reading_ease, group=as.factor(subdir))) +
geom_density(aes(fill=as.factor(subdir)), alpha=0.35, position="identity") +
scale_fill_manual(values = readmeSubdirColors) +
xlim(-5, 90) +
labs(x= "Flesch Reading Ease", y= "README Density")+
guides(fill="none", color="none")+
theme_bw()
readme_reading_ease
#plotting readme reading time
readme_reading_time_plot <- ggplot(readme_df, aes(x=reading_time, group=as.factor(subdir))) +
geom_density(aes(fill=as.factor(subdir)), alpha=0.8, position="fill") +
scale_fill_manual(values = readmeSubdirColors) +
xlim(-5, 90) +
labs(x= "Reading Time (s)", y= NULL)+
guides(fill="none", color="none")+
theme_bw()
readme_reading_time_plot
readme_reading_time_no_group <- ggplot(readme_df, aes(x=reading_time)) +
geom_histogram(fill='forestgreen') +
xlim(-5, 190) +
ylab("Count of README Files") +
xlab("Reading Time (s)") +
ggtitle("Reading Time for README files from FLOSS Projects (n=2280)")+
guides(fill="none", color="none")+
theme_bw()
readme_reading_time_no_group
readme_linsear_plot <- ggplot(readme_df, aes(x=linsear_write_formula, group=as.factor(subdir))) +
geom_density(aes(fill=as.factor(subdir)), alpha=0.35, position="identity") +
scale_fill_manual(values = readmeSubdirColors) +
xlim(-5, 90) +
labs(x= "Linsear Write Score", y= NULL)+
guides(fill="none", color="none")+
theme_bw()
readme_linsear_plot
readme_mcalpine_eflaw <- ggplot(readme_df, aes(x=mcalpine_eflaw, group=as.factor(subdir))) +
geom_density(aes(fill=as.factor(subdir)), alpha=0.35, position="identity") +
scale_fill_manual(values = readmeSubdirColors) +
xlim(-5, 90) +
labs(x= "McAlpine EFLAW", y= NULL)+
guides(fill="none", color="none")+
theme_bw()
#theme(axis.title.y=element_blank())
#plot of reading_ease
#readme_df <- readme_df |>
# mutate(coef_grouping <- as.factor(subdir))
#test_lm <- lm(mcalpine_eflaw ~ word_count + as.factor(subdir),data=readme_df)
#summary(test_lm)
aggregate(contributing_df[, 3:10], list(contributing_df$subdir), median)
# plotting contributing linsear writing formula
contributing_linsear_plot <- ggplot(contributing_df, aes(x=linsear_write_formula, group=as.factor(subdir))) +
geom_density(aes(fill=as.factor(subdir)), alpha=0.35, position="identity") +
scale_fill_manual(values = subdirColors, labels=c('CI < 0', '0 in CI', '0 < CI')) +
labs(x= NULL, y= NULL, fill="RE Grouping")+
xlim(-5, 90) +
theme_bw() +
guides(fill="none", color="none")
# plotting contributing reading time
contributing_reading_time_plot <- ggplot(contributing_df, aes(x=reading_time, group=as.factor(subdir))) +
scale_fill_manual(values = subdirColors, labels=c('CI < 0', '0 in CI', '0 < CI')) +
geom_density(aes(fill=as.factor(subdir)), alpha=0.35, position="identity") +
xlim(-5, 90) +
labs(x= NULL, y= NULL, fill="RE Grouping")+
theme_bw() +
theme(legend.position = "inside",
legend.position.inside = c(.93, .93),
legend.justification = c("right", "top"),
legend.direction = "horizontal",
legend.margin = margin(6, 6, 6, 6))
# plotting contributing mcalpine eflaw
contributing_mcalpine_eflaw <- ggplot(contributing_df, aes(x=mcalpine_eflaw, group=as.factor(subdir))) +
scale_fill_manual(values = subdirColors, labels=c('CI < 0', '0 in CI', '0 < CI')) +
geom_density(aes(fill=as.factor(subdir)), alpha=0.35, position="identity") +
xlim(-5, 90) +
labs(x= NULL, y= NULL, fill="RE Grouping")+
theme_bw() +
theme(legend.position = "inside",
legend.position.inside = c(.93, .93),
legend.justification = c("right", "top"),
legend.direction = "vertical",
legend.margin = margin(6, 6, 6, 6))
# plotting contributing reading ease
contributing_reading_ease <- ggplot(contributing_df, aes(x=flesch_reading_ease, group=as.factor(subdir))) +
geom_density(aes(fill=as.factor(subdir)), alpha=0.35, position="identity") +
scale_fill_manual(values = subdirColors, labels=c('CI < 0', '0 in CI', '0 < CI')) +
labs(x= NULL, y="CONTRIBUTING Density", fill="RE Grouping")+
xlim(-5, 90) +
theme_bw() +
guides(fill="none", color="none")
contributing_reading_ease
grid.arrange(contributing_reading_ease, contributing_linsear_plot,contributing_mcalpine_eflaw, readme_reading_ease, readme_linsear_plot, readme_mcalpine_eflaw, nrow = 2)
readme_df$type = "README"
contributing_df$type = "CONTRIBUTING"
all_df = rbind(readme_df, contributing_df)
length_plot_all <- ggplot(all_df, aes(x=word_count, group=as.factor(type))) +
geom_density(aes(fill = as.factor(type)), color = NA, alpha=0.4, position="identity")+
xlim(-10, 500) +
labs(
x = "Word Count",
y = "Density Across Documents",
fill="Document Type"
) +
theme_bw() +
theme(legend.position = "top")
length_plot_all
grid.arrange(contributing_reading_ease, contributing_linsear_plot,contributing_mcalpine_eflaw, readme_reading_ease, readme_linsear_plot, readme_mcalpine_eflaw, nrow = 2)
length_plot_all <- ggplot(all_df, aes(x=word_count, group=as.factor(type))) +
geom_density(aes(fill = as.factor(type)), color = NA, alpha=0.4, position="identity")+
xlim(-10, 500) +
labs(
x = "Word Count",
y = "Density Across Documents",
fill="Document Type"
) +
theme_bw() +
theme(legend.position = "top")
length_plot_all
# plotting contributing reading time
contributing_reading_time_plot <- ggplot(contributing_df, aes(x=reading_time, group=as.factor(subdir))) +
scale_fill_manual(values = subdirColors, labels=c('CI < 0', '0 in CI', '0 < CI')) +
geom_density(aes(fill=as.factor(subdir)), position="fill") +
xlim(-5, 90) +
labs(x= NULL, y= NULL, fill="RE Grouping")+
theme_bw() +
theme(legend.position = "inside",
legend.position.inside = c(.93, .93),
legend.justification = c("right", "top"),
legend.direction = "horizontal",
legend.margin = margin(6, 6, 6, 6))
contributing_reading_time_plot
grid.arrange(contributing_reading_ease, contributing_reading_time_plot, readme_reading_ease, readme_reading_time_plot, nrow = 2)
#plotting readme reading time
readme_reading_time_plot <- ggplot(readme_df, aes(x=reading_time, group=as.factor(subdir))) +
geom_density(aes(fill=as.factor(subdir)), position="fill") +
scale_fill_manual(values = readmeSubdirColors) +
xlim(-5, 90) +
labs(x= "Reading Time (s)", y= NULL)+
guides(fill="none", color="none")+
theme_bw()
#plotting readme reading ease
readme_reading_ease <- ggplot(readme_df, aes(x=flesch_reading_ease, group=as.factor(subdir))) +
geom_density(aes(fill=as.factor(subdir)), position="fill") +
scale_fill_manual(values = readmeSubdirColors) +
xlim(-5, 90) +
labs(x= "Flesch Reading Ease", y= "README Density")+
guides(fill="none", color="none")+
theme_bw()
# plotting contributing reading ease
contributing_reading_ease <- ggplot(contributing_df, aes(x=flesch_reading_ease, group=as.factor(subdir))) +
geom_density(aes(fill=as.factor(subdir)), position="fill") +
scale_fill_manual(values = subdirColors, labels=c('CI < 0', '0 in CI', '0 < CI')) +
labs(x= NULL, y="CONTRIBUTING Density", fill="RE Grouping")+
xlim(-5, 90) +
theme_bw() +
guides(fill="none", color="none")
grid.arrange(contributing_reading_ease, contributing_reading_time_plot, readme_reading_ease, readme_reading_time_plot, nrow = 2)
# establishing the color scheme
subdirColors <-
setNames( c('#68293c', '#ffcf67', '#91d8f0')
, levels(contributing_df$subdir) )
readmeSubdirColors <-
setNames( c('#ffcf67', '#91d8f0')
, levels(readme_df$subdir) )
#plotting readme reading ease
readme_reading_ease <- ggplot(readme_df, aes(x=flesch_reading_ease, group=as.factor(subdir))) +
geom_density(aes(fill=as.factor(subdir)), position="fill") +
scale_fill_manual(values = readmeSubdirColors) +
xlim(-5, 90) +
labs(x= "Flesch Reading Ease", y= "README Density")+
guides(fill="none", color="none")+
theme_bw()
#plotting readme reading time
readme_reading_time_plot <- ggplot(readme_df, aes(x=reading_time, group=as.factor(subdir))) +
geom_density(aes(fill=as.factor(subdir)), position="fill") +
scale_fill_manual(values = readmeSubdirColors) +
xlim(-5, 90) +
labs(x= "Reading Time (s)", y= NULL)+
guides(fill="none", color="none")+
theme_bw()
# plotting contributing reading time
contributing_reading_time_plot <- ggplot(contributing_df, aes(x=reading_time, group=as.factor(subdir))) +
scale_fill_manual(values = subdirColors, labels=c('CI < 0', '0 in CI', '0 < CI')) +
geom_density(aes(fill=as.factor(subdir)), position="fill") +
xlim(-5, 90) +
labs(x= NULL, y= NULL, fill="RE Grouping")+
theme_bw() +
theme(legend.position = "inside",
legend.position.inside = c(.93, .93),
legend.justification = c("right", "top"),
legend.direction = "horizontal",
legend.margin = margin(6, 6, 6, 6))
# plotting contributing reading ease
contributing_reading_ease <- ggplot(contributing_df, aes(x=flesch_reading_ease, group=as.factor(subdir))) +
geom_density(aes(fill=as.factor(subdir)), position="fill") +
scale_fill_manual(values = subdirColors, labels=c('CI < 0', '0 in CI', '0 < CI')) +
labs(x= NULL, y="CONTRIBUTING Density", fill="RE Grouping")+
xlim(-5, 90) +
theme_bw() +
guides(fill="none", color="none")
grid.arrange(contributing_reading_ease, contributing_reading_time_plot, readme_reading_ease, readme_reading_time_plot, nrow = 2)
source("~/Desktop/git/24_deb_gov/R/documentReadabilityAnalysis.R")
subdirColors <-
setNames( c('#31449c', '#4a7c85', '#c5db68')
, c(0,1,2) )
contrib_g <- contrib_groupings |>
ggplot(aes(x=rank, y=estimate, col = as.factor(ranef_grouping))) +
geom_linerange(aes(ymin= conf.low, ymax= conf.high)) +
scale_color_manual(values = subdirColors, labels=c('CI < 0', '0 in CI', '0 < CI')) +
theme_bw() +
theme(legend.position = "top") +
labs(x="RE Coefficient Rank", y="RE Coefficient Estimate", color="Estimate Grouping")
contrib_g
contrib_groupings <- read.csv('../final_data/deb_contrib_interaction_groupings.csv')
subdirColors <-
setNames( c('#31449c', '#4a7c85', '#c5db68')
, c(0,1,2) )
contrib_g <- contrib_groupings |>
ggplot(aes(x=rank, y=estimate, col = as.factor(ranef_grouping))) +
geom_linerange(aes(ymin= conf.low, ymax= conf.high)) +
scale_color_manual(values = subdirColors, labels=c('CI < 0', '0 in CI', '0 < CI')) +
theme_bw() +
theme(legend.position = "top") +
labs(x="RE Coefficient Rank", y="RE Coefficient Estimate", color="Estimate Grouping")
contrib_g
source("~/Desktop/git/24_deb_gov/R/gam_plot_documents.R")
doctypeColors <-
setNames( c('#4a7c85', '#c5db68')
, factor(all_actions_data$document_type))
View(all_actions_data)
doctypeColors <-
setNames( c('#4a7c85', '#c5db68')
, c("CONTRIBUTING", "README"))
time_plot <- all_actions_data |>
ggplot(aes(x=week_offset, y=log1p_count, color=factor(document_type))) +
labs(x="Weekly Offset", y="Log Transformed Commit Count", color="Document Type") +
scale_fill_manual(values = doctypeColors) +
geom_smooth() +
geom_vline(xintercept = 0)+
theme_bw() +
theme(legend.position = "top")
time_plot
time_plot <- all_actions_data |>
ggplot(aes(x=week_offset, y=log1p_count, color=factor(document_type))) +
labs(x="Weekly Offset", y="Log Transformed Commit Count", color="Document Type") +
scale_color_manual(values = doctypeColors) +
geom_smooth() +
geom_vline(xintercept = 0)+
theme_bw() +
theme(legend.position = "top")
time_plot
doctypeColors <-
setNames( c('#ffcf67', '#91d8f0')
, c("CONTRIBUTING", "README"))
time_plot <- all_actions_data |>
ggplot(aes(x=week_offset, y=log1p_count, color=factor(document_type))) +
labs(x="Weekly Offset", y="Log Transformed Commit Count", color="Document Type") +
scale_color_manual(values = doctypeColors) +
geom_smooth() +
geom_vline(xintercept = 0)+
theme_bw() +
theme(legend.position = "top")
time_plot
doctypeColors <-
setNames( c('#7d1b16', '#263b90')
, c("CONTRIBUTING", "README"))
time_plot <- all_actions_data |>
ggplot(aes(x=week_offset, y=log1p_count, color=factor(document_type))) +
labs(x="Weekly Offset", y="Log Transformed Commit Count", color="Document Type") +
scale_color_manual(values = doctypeColors) +
geom_smooth() +
geom_vline(xintercept = 0)+
theme_bw() +
theme(legend.position = "top")
time_plot
doctypeColors <-
setNames( c('#995223', '#2464ad')
, c("CONTRIBUTING", "README"))
time_plot <- all_actions_data |>
ggplot(aes(x=week_offset, y=log1p_count, color=factor(document_type))) +
labs(x="Weekly Offset", y="Log Transformed Commit Count", color="Document Type") +
scale_color_manual(values = doctypeColors) +
geom_smooth() +
geom_vline(xintercept = 0)+
theme_bw() +
theme(legend.position = "top")
time_plot
doctypeColors <-
setNames( c('#ba6b44', '#5d7fbd')
, c("CONTRIBUTING", "README"))
time_plot <- all_actions_data |>
ggplot(aes(x=week_offset, y=log1p_count, color=factor(document_type))) +
labs(x="Weekly Offset", y="Log Transformed Commit Count", color="Document Type") +
scale_color_manual(values = doctypeColors) +
geom_smooth() +
geom_vline(xintercept = 0)+
theme_bw() +
theme(legend.position = "top")
time_plot
doctypeColors <-
setNames( c('#5da2d8', '#c7756a')
, c("CONTRIBUTING", "README"))
time_plot <- all_actions_data |>
ggplot(aes(x=week_offset, y=log1p_count, color=factor(document_type))) +
labs(x="Weekly Offset", y="Log Transformed Commit Count", color="Document Type") +
scale_color_manual(values = doctypeColors) +
geom_smooth() +
geom_vline(xintercept = 0)+
theme_bw() +
theme(legend.position = "top")
time_plot
time_plot <- all_actions_data |>
ggplot(aes(x=week_offset, y=count, color=factor(document_type))) +
scale_y_log1p() +
labs(x="Weekly Offset", y="Log Transformed Commit Count", color="Document Type") +
scale_color_manual(values = doctypeColors) +
geom_smooth() +
geom_vline(xintercept = 0)+
theme_bw() +
theme(legend.position = "top")
time_plot <- all_actions_data |>
ggplot(aes(x=week_offset, y=count, color=factor(document_type))) +
labs(x="Weekly Offset", y="Log Transformed Commit Count", color="Document Type") +
scale_color_manual(values = doctypeColors) +
geom_smooth() +
geom_vline(xintercept = 0)+
theme_bw() +
theme(legend.position = "top")
time_plot
time_plot <- all_actions_data |>
ggplot(aes(x=week_offset, y=log1p_count, color=factor(document_type))) +
labs(x="Weekly Offset", y="Log Transformed Commit Count", color="Document Type") +
scale_color_manual(values = doctypeColors) +
geom_smooth() +
geom_vline(xintercept = 0)+
theme_bw() +
theme(legend.position = "top")
time_plot
time_plot <- all_actions_data |>
ggplot(aes(x=week_offset, y=log1p_count, color=factor(document_type))) +
scale_y_continuous(breaks = c(0, 0.5, 1.0, 1.5),
labels = round(c(expm1(0), expm1(0.5), expm1(1.0), exp,1(1.5)), 1))
time_plot <- all_actions_data |>
ggplot(aes(x=week_offset, y=log1p_count, color=factor(document_type))) +
scale_y_continuous(breaks = c(0, 0.5, 1.0, 1.5),
labels = round(c(expm1(0), expm1(0.5), expm1(1.0), exp,1(1.5)), 1)) +
labs(x="Weekly Offset", y="Log Transformed Commit Count", color="Document Type") +
scale_color_manual(values = doctypeColors) +
geom_smooth() +
geom_vline(xintercept = 0)+
theme_bw() +
theme(legend.position = "top")
time_plot <- all_actions_data |>
ggplot(aes(x=week_offset, y=log1p_count, color=factor(document_type))) +
scale_y_continuous(breaks = c(0, 0.5, 1.0, 1.5),
labels = round(c(expm1(0), expm1(0.5), expm1(1.0), expm1(1.5)), 1)) +
labs(x="Weekly Offset", y="Log Transformed Commit Count", color="Document Type") +
scale_color_manual(values = doctypeColors) +
geom_smooth() +
geom_vline(xintercept = 0)+
theme_bw() +
theme(legend.position = "top")
time_plot
time_plot <- all_actions_data |>
ggplot(aes(x=week_offset, y=log1p_count, color=factor(document_type))) +
scale_y_continuous(breaks = c(0, 0.5, 1.0, 1.5),
labels = round(c(expm1(0), expm1(0.5), expm1(1.0), expm1(1.5)), 1)) +
labs(x="Weekly Offset", y="Commit Count", color="Document Type") +
scale_color_manual(values = doctypeColors) +
geom_smooth() +
geom_vline(xintercept = 0)+
theme_bw() +
theme(legend.position = "top")
time_plot
time_plot <- all_actions_data |>
ggplot(aes(x=week_offset, y=log1p_count, color=factor(document_type))) +
labs(x="Weekly Offset", y="Commit Count", color="Document Type") +
scale_color_manual(values = doctypeColors) +
geom_smooth() +
geom_vline(xintercept = 0)+
theme_bw() +
theme(legend.position = "top")
time_plot
time_plot <- all_actions_data |>
ggplot(aes(x=week_offset, y=log1p_count, color=factor(document_type))) +
scale_y_continuous(breaks = c(0, 0.5, 1.0, 1.5),
labels = round(c(expm1(0), expm1(0.5), expm1(1.0), expm1(1.5)), 1)) +
labs(x="Weekly Offset", y="Commit Count", color="Document Type") +
scale_color_manual(values = doctypeColors) +
geom_smooth() +
geom_vline(xintercept = 0)+
theme_bw() +
theme(legend.position = "top")
time_plot
time_plot <- all_actions_data |>
ggplot(aes(x=week_offset, y=log1p_count, color=factor(document_type))) +
labs(x="Weekly Offset", y="Commit Count", color="Document Type") +
scale_color_manual(values = doctypeColors) +
geom_smooth() +
geom_vline(xintercept = 0)+
theme_bw() +
theme(legend.position = "top")
time_plot
source("~/Desktop/git/24_deb_gov/R/documentReadabilityAnalysis.R")
length_plot_all
length_plot_all <- ggplot(all_df, aes(x=word_count, group=as.factor(type))) +
geom_density(aes(fill = as.factor(type)), color = NA, alpha=0.5, position="identity")+
scale_fill_manual(values = doctypeColors) +
xlim(-10, 500) +
labs(
x = "Word Count",
y = "Density Across Documents",
fill="Document Type"
) +
theme_bw() +
theme(legend.position = "top")
length_plot_all
length_plot_all <- ggplot(all_df, aes(x=word_count, group=as.factor(type))) +
geom_density(aes(fill = as.factor(type)), color = NA, alpha=0.6, position="identity")+
scale_fill_manual(values = doctypeColors) +
xlim(-10, 500) +
labs(
x = "Word Count",
y = "Density Across Documents",
fill="Document Type"
) +
theme_bw() +
theme(legend.position = "top")
length_plot_all