diff --git a/.RData b/.RData index d9d26e0..f7bb7ff 100644 Binary files a/.RData and b/.RData differ diff --git a/mlm/gam_plot.R b/mlm/gam_plot.R index bdc4a88..085be09 100644 --- a/mlm/gam_plot.R +++ b/mlm/gam_plot.R @@ -1,4 +1,4 @@ - +library(dplyr) contributing_df_filepath <- "/mmfs1/gscratch/comdata/users/mjilg/govdoc-cr-data/final_data/CONTRIBUTING_weekly_count_data.csv" contributing_df = read.csv(contributing_df_filepath, header = TRUE) @@ -41,4 +41,4 @@ time_plot <- main_df |> theme(legend.position = "top") time_plot -#ggsave(filename = "plots/cr-020225-gam-introduction.png", plot = time_plot, width = 8, height = 6, dpi = 700) +ggsave(filename = "plots/cr-020325-gam-introduction.png", plot = time_plot, width = 9, height = 9, dpi = 800) diff --git a/plots/cr-0203-contributing-blup.png b/plots/cr-0203-contributing-blup.png new file mode 100644 index 0000000..eb5e589 Binary files /dev/null and b/plots/cr-0203-contributing-blup.png differ diff --git a/plots/cr-0203-ranef-readability.png b/plots/cr-0203-ranef-readability.png new file mode 100644 index 0000000..403b9e9 Binary files /dev/null and b/plots/cr-0203-ranef-readability.png differ diff --git a/plots/cr-0203-wc-density.png b/plots/cr-0203-wc-density.png new file mode 100644 index 0000000..5123a5b Binary files /dev/null and b/plots/cr-0203-wc-density.png differ diff --git a/plots/cr-020325-gam-introduction.png b/plots/cr-020325-gam-introduction.png new file mode 100644 index 0000000..1b8114f Binary files /dev/null and b/plots/cr-020325-gam-introduction.png differ diff --git a/plots/mem_presentation.R b/plots/mem_presentation.R new file mode 100644 index 0000000..f9ab6de --- /dev/null +++ b/plots/mem_presentation.R @@ -0,0 +1,45 @@ +library(tidyverse) +library(texreg) + +readme_rdd <- readRDS("mlm/models/020325_readme_model.rda") +contrib_rdd <- readRDS("mlm/models/020125_contributing_model.rda") + +texreg(list(readme_rdd, contrib_rdd), stars=NULL, digits=3, use.packages=FALSE, + custom.model.names=c( 'README','CONTRIBUTING'), + custom.coef.names=c('(Intercept)', 'Indtroduction', 'Week (Time)', 'Project Age', 'Introduction:Week'), + table=FALSE, ci.force = TRUE) + +readme_groupings <- read.csv('mlm/data/0203_readme_dweek_ranefs.csv') +contrib_groupings <- read.csv('mlm/data/0201_contributing_dweek_ranefs.csv') + +subdirColors <- + setNames( c('#31449c', '#4a7c85', '#c5db68') + , c(0,1,2) ) + +readme_g <- readme_groupings |> + ggplot(aes(x=rank, y=estimate, col = as.factor(ranef_grouping))) + + geom_linerange(aes(ymin= conf.low, ymax= conf.high)) + + scale_color_manual(values = subdirColors) + + guides(fill="none", color="none")+ + theme_bw() + + labs(x="RE Coefficient Rank", y="RE Coefficient Estimate", color="Estimate Grouping") +readme_g + + +contrib_g <- contrib_groupings |> + ggplot(aes(x=rank, y=estimate, col = as.factor(ranef_grouping))) + + geom_linerange(aes(ymin= conf.low, ymax= conf.high)) + + scale_color_manual(values = subdirColors, labels=c('CI < 0', '0 in CI', '0 < CI')) + + theme_bw() + + theme(legend.position = "top") + + labs(x="RE Coefficient Rank", y="RE Coefficient Estimate", color="Estimate Grouping") +contrib_g + +#ggsave(filename = "plots/cr-0203-contributing-blup.png", plot = contrib_g, width = 9, height = 9, dpi = 800) + + +texreg(list(readme_commits_, commits_), stars=NULL, digits=3, use.packages=FALSE, + custom.model.names=c( 'README','Contributing'), + custom.coef.names=c('Topic 1', 'Topic 2', 'Topic 3', 'Topic 4', 'Topic 5', 'Topic 6', 'Topic 7', 'Topic 8'), + table=FALSE, ci.force = TRUE) + diff --git a/plots/text_presentation.R b/plots/text_presentation.R new file mode 100644 index 0000000..c213488 --- /dev/null +++ b/plots/text_presentation.R @@ -0,0 +1,79 @@ +library(tidyverse) +readme_groupings <- read.csv('text_analysis/0203_readme_merged_manifest.csv') +contrib_groupings <- read.csv('text_analysis/0203_contributing_merged_manifest.csv') +contrib_groupings$filename <- contrib_groupings$fvf_filepath +readme_groupings$filename <- readme_groupings$fvf_filepath +readme_textstat <- read.csv('text_analysis/020325_README_readability.csv') +contributing_textstat <- read.csv('text_analysis/020125_CONTRIBUTING_readability.csv') + + +doctypeColors <- + setNames( c('#5da2d8', '#c7756a') + , c("CONTRIBUTING", "README")) +readme_textstat$type = "README" +contributing_textstat$type = "CONTRIBUTING" +all_df = rbind(readme_textstat, contributing_textstat) +length_plot_all <- ggplot(all_df, aes(x=word_count, group=as.factor(type))) + + geom_density(aes(fill = as.factor(type)), color = NA, alpha=0.6, position="identity")+ + scale_fill_manual(values = doctypeColors) + + xlim(-10, 500) + + labs( + x = "Word Count", + y = "Density Across Documents", + fill="Document Type" + ) + + theme_bw() + + theme(legend.position = "top") +length_plot_all + +#ggsave(filename = "plots/cr-0203-wc-density.png", plot = length_plot_all, width = 9, height = 9, dpi = 800) + +contributing_df <- inner_join(contributing_textstat, contrib_groupings, by="filename") +readme_df <- inner_join(readme_textstat, readme_groupings, by="filename") + +subdirColors <- + setNames( c('#31449c', '#4a7c85', '#c5db68') + , c(0,1,2) ) + +contributing_reading_time_plot <- ggplot(contributing_df, aes(x=reading_time, group=as.factor(ranef_grouping))) + + scale_fill_manual(values = subdirColors, labels=c('CI < 0', '0 in CI', '0 < CI')) + + geom_density(aes(fill=as.factor(ranef_grouping)), position="fill") + + xlim(-5, 90) + + labs(x= NULL, y= NULL, fill="RE Grouping")+ + theme_bw() + + theme(legend.position = "inside", + legend.position.inside = c(.90, .90), + legend.justification = c("right", "top"), + legend.direction = "horizontal", + legend.margin = margin(6, 6, 6, 6)) +contributing_reading_time_plot + +contributing_reading_ease <- ggplot(contributing_df, aes(x=flesch_reading_ease, group=as.factor(ranef_grouping))) + + geom_density(aes(fill=as.factor(ranef_grouping)), position="fill") + + scale_fill_manual(values = subdirColors, labels=c('CI < 0', '0 in CI', '0 < CI')) + + labs(x= NULL, y="CONTRIBUTING Density", fill="RE Grouping")+ + xlim(-5, 90) + + theme_bw() + + guides(fill="none", color="none") +#contributing_reading_ease + +readme_reading_time_plot <- ggplot(readme_df, aes(x=reading_time, group=as.factor(ranef_grouping))) + + geom_density(aes(fill=as.factor(ranef_grouping)), position="fill") + + scale_fill_manual(values = subdirColors) + + xlim(-5, 90) + + labs(x= "Reading Time (s)", y= NULL)+ + guides(fill="none", color="none")+ + theme_bw() +#readme_reading_time_plot + +readme_reading_ease <- ggplot(readme_df, aes(x=flesch_reading_ease, group=as.factor(ranef_grouping))) + + geom_density(aes(fill=as.factor(ranef_grouping)), position="fill") + + scale_fill_manual(values = subdirColors) + + xlim(-5, 90) + + labs(x= "Flesch Reading Ease", y= "README Density")+ + guides(fill="none", color="none")+ + theme_bw() +#readme_reading_ease +library(gridExtra) +grid.arrange(contributing_reading_ease, contributing_reading_time_plot, readme_reading_ease, readme_reading_time_plot, nrow = 2) + diff --git a/020325_CONTRIBUTING_commit_topic_model.rda b/topic-outcome-models/020325_README_commit_topic_model.rda similarity index 100% rename from 020325_CONTRIBUTING_commit_topic_model.rda rename to topic-outcome-models/020325_README_commit_topic_model.rda