updating with outcome models and plots
This commit is contained in:
parent
4be3640858
commit
212e68a056
@ -1,17 +1,17 @@
|
||||
1. SSH tunnel from your workstation using the following command:
|
||||
|
||||
ssh -N -L 8787:n3434:49157 mjilg@klone.hyak.uw.edu
|
||||
ssh -N -L 8787:n3436:56811 mjilg@klone.hyak.uw.edu
|
||||
|
||||
and point your web browser to http://localhost:8787
|
||||
|
||||
2. log in to RStudio Server using the following credentials:
|
||||
|
||||
user: mjilg
|
||||
password: shDpem/m/RHo7HO1CuWG
|
||||
password: +g73U+bdF4uygmNdsKEt
|
||||
|
||||
When done using RStudio Server, terminate the job by:
|
||||
|
||||
1. Exit the RStudio Session ("power" button in the top right corner of the RStudio window)
|
||||
2. Issue the following command on the login node:
|
||||
|
||||
scancel -f 24074016
|
||||
scancel -f 24085748
|
BIN
plots/0207-blup-readability-plot.png
Normal file
BIN
plots/0207-blup-readability-plot.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 85 KiB |
BIN
plots/cr-0207-contributing-blup.png
Normal file
BIN
plots/cr-0207-contributing-blup.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 311 KiB |
BIN
plots/cr-0207-wc-density.png
Normal file
BIN
plots/cr-0207-wc-density.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 428 KiB |
@ -1,16 +1,16 @@
|
||||
library(tidyverse)
|
||||
library(texreg)
|
||||
|
||||
readme_rdd <- readRDS("mlm/models/020325_readme_model.rda")
|
||||
contrib_rdd <- readRDS("mlm/models/020125_contributing_model.rda")
|
||||
readme_rdd <- readRDS("mlm/models/020725_readme_model.rda")
|
||||
contrib_rdd <- readRDS("mlm/models/020725_contributing_model.rda")
|
||||
|
||||
texreg(list(readme_rdd, contrib_rdd), stars=NULL, digits=3, use.packages=FALSE,
|
||||
custom.model.names=c( 'README','CONTRIBUTING'),
|
||||
custom.coef.names=c('(Intercept)', 'Indtroduction', 'Week (Time)', 'Project Age', 'Introduction:Week'),
|
||||
table=FALSE, ci.force = TRUE)
|
||||
|
||||
readme_groupings <- read.csv('mlm/data/0203_readme_dweek_ranefs.csv')
|
||||
contrib_groupings <- read.csv('mlm/data/0201_contributing_dweek_ranefs.csv')
|
||||
readme_groupings <- read.csv('mlm/data/0207_readme_dweek_ranefs.csv')
|
||||
contrib_groupings <- read.csv('mlm/data/0207_contributing_dweek_ranefs.csv')
|
||||
|
||||
subdirColors <-
|
||||
setNames( c('#31449c', '#4a7c85', '#c5db68')
|
||||
@ -35,7 +35,7 @@ contrib_g <- contrib_groupings |>
|
||||
labs(x="RE Coefficient Rank", y="RE Coefficient Estimate", color="Estimate Grouping")
|
||||
contrib_g
|
||||
|
||||
#ggsave(filename = "plots/cr-0203-contributing-blup.png", plot = contrib_g, width = 9, height = 9, dpi = 800)
|
||||
ggsave(filename = "plots/cr-0207-contributing-blup.png", plot = contrib_g, width = 9, height = 9, dpi = 800)
|
||||
|
||||
|
||||
texreg(list(readme_commits_, commits_), stars=NULL, digits=3, use.packages=FALSE,
|
||||
|
@ -1,22 +1,24 @@
|
||||
library(tidyverse)
|
||||
readme_groupings <- read.csv('text_analysis/0203_readme_merged_manifest.csv')
|
||||
contrib_groupings <- read.csv('text_analysis/0203_contributing_merged_manifest.csv')
|
||||
readme_groupings <- read.csv('text_analysis/0207_readme_merged_manifest.csv')
|
||||
contrib_groupings <- read.csv('text_analysis/0207_contributing_merged_manifest.csv')
|
||||
contrib_groupings$filename <- contrib_groupings$fvf_filepath
|
||||
readme_groupings$filename <- readme_groupings$fvf_filepath
|
||||
readme_textstat <- read.csv('text_analysis/020325_README_readability.csv')
|
||||
contributing_textstat <- read.csv('text_analysis/020125_CONTRIBUTING_readability.csv')
|
||||
readme_textstat <- read.csv('text_analysis/020725_README_readability.csv')
|
||||
contributing_textstat <- read.csv('text_analysis/020725_CONTRIBUTING_readability.csv')
|
||||
|
||||
|
||||
doctypeColors <-
|
||||
setNames( c('#5da2d8', '#c7756a')
|
||||
, c("CONTRIBUTING", "README"))
|
||||
setNames( c('#c7756a','#5da2d8')
|
||||
, c("README","CONTRIBUTING"))
|
||||
readme_textstat$type = "README"
|
||||
contributing_textstat$type = "CONTRIBUTING"
|
||||
all_df = rbind(readme_textstat, contributing_textstat)
|
||||
all_df = rbind(contributing_textstat,readme_textstat)
|
||||
all_df$type <- factor(all_df$type, levels = c("CONTRIBUTING", "README"))
|
||||
|
||||
length_plot_all <- ggplot(all_df, aes(x=word_count, group=as.factor(type))) +
|
||||
geom_density(aes(fill = as.factor(type)), color = NA, alpha=0.6, position="identity")+
|
||||
scale_fill_manual(values = doctypeColors) +
|
||||
xlim(-10, 500) +
|
||||
xlim(-10, 600) +
|
||||
labs(
|
||||
x = "Word Count",
|
||||
y = "Density Across Documents",
|
||||
@ -26,10 +28,10 @@ length_plot_all <- ggplot(all_df, aes(x=word_count, group=as.factor(type))) +
|
||||
theme(legend.position = "top")
|
||||
length_plot_all
|
||||
|
||||
#ggsave(filename = "plots/cr-0203-wc-density.png", plot = length_plot_all, width = 9, height = 9, dpi = 800)
|
||||
#ggsave(filename = "plots/cr-0207-wc-density.png", plot = length_plot_all, width = 9, height = 9, dpi = 800)
|
||||
|
||||
contributing_df <- inner_join(contributing_textstat, contrib_groupings, by="filename")
|
||||
readme_df <- inner_join(readme_textstat, readme_groupings, by="filename")
|
||||
contributing_df <- inner_join(contributing_textstat, contrib_groupings, by=c("filename"="new_filepath"))
|
||||
readme_df <- inner_join(readme_textstat, readme_groupings, by=c("filename"="new_filepath"))
|
||||
|
||||
subdirColors <-
|
||||
setNames( c('#31449c', '#4a7c85', '#c5db68')
|
||||
@ -42,7 +44,7 @@ contributing_reading_time_plot <- ggplot(contributing_df, aes(x=reading_time, gr
|
||||
labs(x= NULL, y= NULL, fill="RE Grouping")+
|
||||
theme_bw() +
|
||||
theme(legend.position = "inside",
|
||||
legend.position.inside = c(.90, .90),
|
||||
legend.position.inside = c(.89, .92),
|
||||
legend.justification = c("right", "top"),
|
||||
legend.direction = "horizontal",
|
||||
legend.margin = margin(6, 6, 6, 6))
|
||||
@ -64,7 +66,7 @@ readme_reading_time_plot <- ggplot(readme_df, aes(x=reading_time, group=as.facto
|
||||
labs(x= "Reading Time (s)", y= NULL)+
|
||||
guides(fill="none", color="none")+
|
||||
theme_bw()
|
||||
#readme_reading_time_plot
|
||||
readme_reading_time_plot
|
||||
|
||||
readme_reading_ease <- ggplot(readme_df, aes(x=flesch_reading_ease, group=as.factor(ranef_grouping))) +
|
||||
geom_density(aes(fill=as.factor(ranef_grouping)), position="fill") +
|
||||
@ -73,7 +75,7 @@ readme_reading_ease <- ggplot(readme_df, aes(x=flesch_reading_ease, group=as.fac
|
||||
labs(x= "Flesch Reading Ease", y= "README Density")+
|
||||
guides(fill="none", color="none")+
|
||||
theme_bw()
|
||||
#readme_reading_ease
|
||||
readme_reading_ease
|
||||
library(gridExtra)
|
||||
grid.arrange(contributing_reading_ease, contributing_reading_time_plot, readme_reading_ease, readme_reading_time_plot, nrow = 2)
|
||||
|
||||
|
Binary file not shown.
Binary file not shown.
BIN
topic-outcome-models/020725_CONTRIBUTING_commit_topic_model.rda
Normal file
BIN
topic-outcome-models/020725_CONTRIBUTING_commit_topic_model.rda
Normal file
Binary file not shown.
BIN
topic-outcome-models/020725_README_commit_topic_model.rda
Normal file
BIN
topic-outcome-models/020725_README_commit_topic_model.rda
Normal file
Binary file not shown.
@ -3,15 +3,20 @@ library(lubridate)
|
||||
library(rdd)
|
||||
library(stringr)
|
||||
|
||||
contributing_count_data_filepath <- "/mmfs1/gscratch/comdata/users/mjilg/govdoc-cr-data/final_data/CONTRIBUTING_weekly_count_data.csv"
|
||||
contributing_count_data_filepath <- "/mmfs1/gscratch/comdata/users/mjilg/govdoc-cr-data/final_data/metadata/final_0207_CONTRIBUTING_weekly_count_data.csv"
|
||||
contributing_count_df = read.csv(contributing_count_data_filepath, header = TRUE)
|
||||
|
||||
contributing_topic_dist_filepath <- "text_analysis/020125_CONTRIBUTING_file_topic_distributions.csv"
|
||||
contributing_topic_dist_filepath <- "text_analysis/020725_CONTRIBUTING_file_topic_distributions.csv"
|
||||
contributing_topics_df = read.csv(contributing_topic_dist_filepath, header = TRUE)
|
||||
|
||||
contributing_merged_manifest <- "text_analysis/0207_contributing_merged_manifest.csv"
|
||||
contributing_manifest_df <- read.csv(contributing_merged_manifest, header=TRUE)
|
||||
|
||||
merged_df <- inner_join(contributing_manifest_df, contributing_topics_df, by=c("new_filepath"= "filename"))
|
||||
|
||||
window_num <- 5
|
||||
contributing_count_df <- contributing_count_df |>
|
||||
filter(week_index >= (- window_num) & week_index <= (window_num)) |>
|
||||
filter(relative_week >= (- window_num) & relative_week <= (window_num)) |>
|
||||
mutate(scaled_age = scale(age)) |>
|
||||
mutate(scaled_age_at_commit = scale(age_at_commit))|>
|
||||
mutate(log1p_count = log1p(commit_count))
|
||||
@ -21,12 +26,7 @@ summed_data <- contributing_count_df |>
|
||||
group_by(project_id) |>
|
||||
summarise_at(vars(commit_count), list(summed_count=sum))
|
||||
|
||||
contributing_topics_df <- contributing_topics_df |>
|
||||
mutate(project_id = sapply(str_split(filename, "_hullabaloo_"), `[`, 1)) |>
|
||||
mutate(project_id = ifelse(filename=="_vcr_vcr_CONTRIBUTING.md", "vcr_vcr", project_id)) |>
|
||||
mutate(project_id = ifelse(filename=="marshmallow-code_marshmallow.git_CONTRIBUTING.rst", "marshmallow-code_marshmallow.git", project_id))
|
||||
|
||||
merged_df <- inner_join(summed_data, contributing_topics_df, by="project_id")
|
||||
merged_df <- inner_join(summed_data, merged_df, by=c("project_id" = "repo_id"))
|
||||
merged_df$logged_commits <- log1p(merged_df$summed_count)
|
||||
|
||||
library(MASS)
|
||||
@ -34,4 +34,4 @@ library(MASS)
|
||||
commit_outcome_model <- glm.nb(logged_commits ~ 0 + t0 + t1 + t2 + t3 + t4, data=merged_df)
|
||||
qqnorm(residuals(commit_outcome_model))
|
||||
summary(commit_outcome_model)
|
||||
saveRDS(commit_outcome_model, "020325_commit_topic_model.rda")
|
||||
saveRDS(commit_outcome_model, "020725_commit_topic_model.rda")
|
||||
|
@ -3,15 +3,20 @@ library(lubridate)
|
||||
library(rdd)
|
||||
library(stringr)
|
||||
|
||||
readme_count_data_filepath <- "/mmfs1/gscratch/comdata/users/mjilg/govdoc-cr-data/final_data/README_weekly_count_data.csv"
|
||||
readme_count_data_filepath <- "/mmfs1/gscratch/comdata/users/mjilg/govdoc-cr-data/final_data/metadata/final_0207_README_weekly_count_data.csv"
|
||||
readme_count_df = read.csv(readme_count_data_filepath, header = TRUE)
|
||||
|
||||
readme_topic_dist_filepath <- "text_analysis/020325_README_file_topic_distributions.csv"
|
||||
readme_topic_dist_filepath <- "text_analysis/020725_README_file_topic_distributions.csv"
|
||||
readme_topics_df = read.csv(readme_topic_dist_filepath, header = TRUE)
|
||||
|
||||
readme_merged_manifest <- "text_analysis/0207_readme_merged_manifest.csv"
|
||||
readme_manifest_df <- read.csv(readme_merged_manifest, header=TRUE)
|
||||
|
||||
merged_df <- inner_join(readme_manifest_df, readme_topics_df, by=c("new_filepath"= "filename"))
|
||||
|
||||
window_num <- 5
|
||||
readme_count_df <- readme_count_df |>
|
||||
filter(week_index >= (- window_num) & week_index <= (window_num)) |>
|
||||
filter(relative_week >= (- window_num) & relative_week <= (window_num)) |>
|
||||
mutate(scaled_age = scale(age)) |>
|
||||
mutate(scaled_age_at_commit = scale(age_at_commit))|>
|
||||
mutate(log1p_count = log1p(commit_count))
|
||||
@ -21,19 +26,15 @@ summed_data <- readme_count_df |>
|
||||
group_by(project_id) |>
|
||||
summarise_at(vars(commit_count), list(summed_count=sum))
|
||||
|
||||
readme_topics_df <- readme_topics_df |>
|
||||
mutate(project_id = sapply(str_split(filename, "_hullabaloo_"), `[`, 1)) |>
|
||||
mutate(project_id = ifelse(filename=="jaraco_keyrings.alt_hullabaloo_README.rst", "jaraco_keyrings.alt", project_id)) |>
|
||||
mutate(project_id = ifelse(filename=="_vcr_vcr_README.md", "vcr_vcr", project_id))
|
||||
|
||||
|
||||
#loss of jaraco_keyring, though jaraco keyrings.alt is represented
|
||||
merged_df <- inner_join(summed_data, readme_topics_df, by="project_id")
|
||||
merged_df <- inner_join(summed_data, merged_df, by=c("project_id" = "repo_id"))
|
||||
|
||||
merged_df$logged_commits <- log1p(merged_df$summed_count)
|
||||
|
||||
library(MASS)
|
||||
commit_outcome_model <- glm.nb(logged_commits ~ 0 + t0 + t1 + t2 + t3 + t4 + t5 + t6 + t7 + t8 + t9 + t10, data=merged_df)
|
||||
commit_outcome_model <- glm.nb(logged_commits ~ 0 + t0 + t1 + t2 + t3 + t4 + t5 + t6 + t7 + t8, data=merged_df)
|
||||
qqnorm(residuals(commit_outcome_model))
|
||||
summary(commit_outcome_model)
|
||||
|
||||
saveRDS(commit_outcome_model, "020325_CONTRIBUTING_commit_topic_model.rda")
|
||||
saveRDS(commit_outcome_model, "020725_README_commit_topic_model.rda")
|
||||
|
Loading…
Reference in New Issue
Block a user