1
0

update with cresc info and plotting

This commit is contained in:
Matthew Gaughan 2025-02-08 15:15:55 -08:00
parent 212e68a056
commit e8afd485ed
8 changed files with 29 additions and 24 deletions

BIN
.RData

Binary file not shown.

Binary file not shown.

0
intersection.R Normal file
View File

View File

@ -1,17 +1,17 @@
1. SSH tunnel from your workstation using the following command: 1. SSH tunnel from your workstation using the following command:
ssh -N -L 8787:n3436:56811 mjilg@klone.hyak.uw.edu ssh -N -L 8787:n3433:51613 mjilg@klone.hyak.uw.edu
and point your web browser to http://localhost:8787 and point your web browser to http://localhost:8787
2. log in to RStudio Server using the following credentials: 2. log in to RStudio Server using the following credentials:
user: mjilg user: mjilg
password: +g73U+bdF4uygmNdsKEt password: ezvnunJrjaDZsvr0nhPR
When done using RStudio Server, terminate the job by: When done using RStudio Server, terminate the job by:
1. Exit the RStudio Session ("power" button in the top right corner of the RStudio window) 1. Exit the RStudio Session ("power" button in the top right corner of the RStudio window)
2. Issue the following command on the login node: 2. Issue the following command on the login node:
scancel -f 24085748 scancel -f 24092572

View File

@ -2,23 +2,23 @@ library(dplyr)
library(lubridate) library(lubridate)
library(rdd) library(rdd)
contributing_df_filepath <- "/mmfs1/gscratch/comdata/users/mjilg/govdoc-cr-data/final_data/CONTRIBUTING_weekly_count_data.csv" contributing_df_filepath <- "/mmfs1/gscratch/comdata/users/mjilg/govdoc-cr-data/final_data/metadata/final_0207_CONTRIBUTING_weekly_count_data.csv"
contributing_df = read.csv(contributing_df_filepath, header = TRUE) contributing_df = read.csv(contributing_df_filepath, header = TRUE)
window_num <- 5 window_num <- 5
contributing_df <- contributing_df |> contributing_df <- contributing_df |>
filter(week_index >= (- window_num) & week_index <= (window_num)) |> filter(relative_week >= (- window_num) & relative_week <= (0)) |>
mutate(scaled_age = scale(age)) |> mutate(scaled_age = scale(age)) |>
mutate(scaled_age_at_commit = scale(age_at_commit))|> mutate(scaled_age_at_commit = scale(age_at_commit))|>
mutate(log1p_count = log1p(commit_count)) |> mutate(log1p_count = log1p(commit_count)) |>
mutate(C = ifelse(week_index > -4 & week_index <= 0, 1,0)) mutate(C = ifelse(relative_week > -3 & relative_week <= 0, 1,0))
library(lme4) library(lme4)
library(optimx) library(optimx)
library(lattice) library(lattice)
all_gmodel <- glmer.nb(log1p_count ~ C * week_index + scaled_age + scaled_age_at_commit + (C * week_index | project_id), all_gmodel <- glmer.nb(log1p_count ~ C * relative_week + scaled_age + scaled_age_at_commit + (C * relative_week | project_id),
control=glmerControl(optimizer="bobyqa", control=glmerControl(optimizer="bobyqa",
optCtrl=list(maxfun=2e5)), nAGQ=0, data=contributing_df) optCtrl=list(maxfun=2e5)), nAGQ=0, data=contributing_df)
summary(all_gmodel) summary(all_gmodel)
saveRDS(all_gmodel, "020425_contributing_crescendo_model.rda") saveRDS(all_gmodel, "020725_contributing_crescendo_model.rda")

View File

@ -1,9 +1,9 @@
contributing_count_filepath <- "/mmfs1/gscratch/comdata/users/mjilg/govdoc-cr-data/final_data/CONTRIBUTING_weekly_count_data.csv" contributing_count_filepath <- "/mmfs1/gscratch/comdata/users/mjilg/govdoc-cr-data/final_data/metadata/final_0207_CONTRIBUTING_weekly_count_data.csv"
contributing_count_df = read.csv(contributing_count_filepath, header = TRUE) contributing_count_df = read.csv(contributing_count_filepath, header = TRUE)
window_num <- 5 window_num <- 5
contributing_count_df <- contributing_count_df |> contributing_count_df <- contributing_count_df |>
filter(week_index >= (- window_num) & week_index <= (window_num)) |> filter(relative_week >= (- window_num) & relative_week <= (window_num)) |>
mutate(scaled_age = scale(age)) |> mutate(scaled_age = scale(age)) |>
mutate(scaled_age_at_commit = scale(age_at_commit))|> mutate(scaled_age_at_commit = scale(age_at_commit))|>
mutate(log1p_count = log1p(commit_count)) mutate(log1p_count = log1p(commit_count))
@ -21,18 +21,19 @@ quantile(aggregate_cccd$avg_weekly_commits, probs = c(0.25, 0.5, 0.75))
quantile(aggregate_cccd$authors_before, probs = c(0.25, 0.5, 0.75)) quantile(aggregate_cccd$authors_before, probs = c(0.25, 0.5, 0.75))
quantile(aggregate_cccd$authors_after, probs = c(0.25, 0.5, 0.75)) quantile(aggregate_cccd$authors_after, probs = c(0.25, 0.5, 0.75))
readme_count_filepath <- "/mmfs1/gscratch/comdata/users/mjilg/govdoc-cr-data/final_data/README_weekly_count_data.csv" readme_count_filepath <- "/mmfs1/gscratch/comdata/users/mjilg/govdoc-cr-data/final_data/metadata/final_0207_README_weekly_count_data.csv"
readme_count_df = read.csv(readme_count_filepath, header = TRUE) readme_count_df = read.csv(readme_count_filepath, header = TRUE)
window_num <- 5 window_num <- 5
readme_count_df <- readme_count_df |> readme_count_df <- readme_count_df |>
filter(week_index >= (- window_num) & week_index <= (window_num)) |> filter(relative_week >= (- window_num) & relative_week <= (window_num)) |>
mutate(scaled_age = scale(age)) |> mutate(scaled_age = scale(age)) |>
mutate(scaled_age_at_commit = scale(age_at_commit))|> mutate(scaled_age_at_commit = scale(age_at_commit))|>
mutate(log1p_count = log1p(commit_count)) mutate(log1p_count = log1p(commit_count))
quantile(readme_count_df$age / 365, probs = c(0.25, 0.5, 0.75)) quantile(readme_count_df$age / 365, probs = c(0.25, 0.5, 0.75))
quantile(readme_count_df$age_at_commit / 365, probs = c(0.25, 0.5, 0.75)) quantile(readme_count_df$age_at_commit / 365, probs = c(0.25, 0.5, 0.75))
sd(readme_count_df$age_at_commit / 365)
aggregate_rccd <- readme_count_df |> aggregate_rccd <- readme_count_df |>
group_by(project_id) |> group_by(project_id) |>
summarize( summarize(
@ -44,25 +45,30 @@ quantile(aggregate_rccd$avg_weekly_commits, probs = c(0.25, 0.5, 0.75))
quantile(aggregate_rccd$authors_before, probs = c(0.25, 0.5, 0.75)) quantile(aggregate_rccd$authors_before, probs = c(0.25, 0.5, 0.75))
quantile(aggregate_rccd$authors_after, probs = c(0.25, 0.5, 0.75)) quantile(aggregate_rccd$authors_after, probs = c(0.25, 0.5, 0.75))
readme_readability <- read.csv("text_analysis/020325_README_readability.csv", header=TRUE) readme_readability <- read.csv("text_analysis/020725_README_readability.csv", header=TRUE)
median(readme_readability$mcalpine_eflaw) median(readme_readability$mcalpine_eflaw)
median(readme_readability$linsear_write_formula) median(readme_readability$linsear_write_formula)
median(readme_readability$flesch_reading_ease) median(readme_readability$flesch_reading_ease)
median(readme_readability$reading_time)
readme_less_than_10 <- readme_readability |>
filter(reading_time < 10)
contributing_readability <- read.csv("text_analysis/020125_CONTRIBUTING_readability.csv", header=TRUE) contributing_readability <- read.csv("text_analysis/020725_CONTRIBUTING_readability.csv", header=TRUE)
median(contributing_readability$mcalpine_eflaw) median(contributing_readability$mcalpine_eflaw)
median(contributing_readability$linsear_write_formula) median(contributing_readability$linsear_write_formula)
median(contributing_readability$flesch_reading_ease) median(contributing_readability$flesch_reading_ease)
median(contributing_readability$reading_time)
contrib_less_than_10 <- contributing_readability |>
filter(reading_time < 10)
contributing_topic <- read.csv("text_analysis/020725_CONTRIBUTING_file_topic_distributions.csv", header=TRUE)
contributing_topic <- read.csv("text_analysis/020125_CONTRIBUTING_file_topic_distributions.csv", header=TRUE)
quantile(contributing_topic$t0, probs = c(0.25, 0.5, 0.75)) quantile(contributing_topic$t0, probs = c(0.25, 0.5, 0.75))
quantile(contributing_topic$t1, probs = c(0.25, 0.5, 0.75)) quantile(contributing_topic$t1, probs = c(0.25, 0.5, 0.75))
quantile(contributing_topic$t2, probs = c(0.25, 0.5, 0.75)) quantile(contributing_topic$t2, probs = c(0.25, 0.5, 0.75))
quantile(contributing_topic$t3, probs = c(0.25, 0.5, 0.75)) quantile(contributing_topic$t3, probs = c(0.25, 0.5, 0.75))
quantile(contributing_topic$t4, probs = c(0.25, 0.5, 0.75)) quantile(contributing_topic$t4, probs = c(0.25, 0.5, 0.75))
readme_topic <- read.csv("text_analysis/020325_README_file_topic_distributions.csv", header=TRUE) readme_topic <- read.csv("text_analysis/020725_README_file_topic_distributions.csv", header=TRUE)
quantile(readme_topic$t0, probs = c(0.25, 0.5, 0.75)) quantile(readme_topic$t0, probs = c(0.25, 0.5, 0.75))
quantile(readme_topic$t1, probs = c(0.25, 0.5, 0.75)) quantile(readme_topic$t1, probs = c(0.25, 0.5, 0.75))
quantile(readme_topic$t2, probs = c(0.25, 0.5, 0.75)) quantile(readme_topic$t2, probs = c(0.25, 0.5, 0.75))
@ -72,8 +78,6 @@ quantile(readme_topic$t5, probs = c(0.25, 0.5, 0.75))
quantile(readme_topic$t6, probs = c(0.25, 0.5, 0.75)) quantile(readme_topic$t6, probs = c(0.25, 0.5, 0.75))
quantile(readme_topic$t7, probs = c(0.25, 0.5, 0.75)) quantile(readme_topic$t7, probs = c(0.25, 0.5, 0.75))
quantile(readme_topic$t8, probs = c(0.25, 0.5, 0.75)) quantile(readme_topic$t8, probs = c(0.25, 0.5, 0.75))
quantile(readme_topic$t9, probs = c(0.25, 0.5, 0.75))
quantile(readme_topic$t10, probs = c(0.25, 0.5, 0.75))

View File

@ -1,4 +1,5 @@
contributing_manifest <- read.csv("text_analysis/0203_contributing_merged_manifest.csv", header=TRUE) contributing_manifest <- read.csv("text_analysis/0207_contributing_merged_manifest.csv", header=TRUE)
readme_manifest <- read.csv("text_analysis/0203_readme_merged_manifest.csv", header=TRUE) readme_manifest <- read.csv("text_analysis/0207_readme_merged_manifest.csv", header=TRUE)
overlap = inner_join(contributing_manifest, readme_manifest, by="repo_id") overlap = inner_join(contributing_manifest, readme_manifest, by="repo_id")
length(overlap)

View File

@ -1,11 +1,11 @@
library(texreg) library(texreg)
readme_rdd <- readRDS("topic-outcome-models/020325_README_commit_topic_model.rda") readme_rdd <- readRDS("topic-outcome-models/020725_README_commit_topic_model.rda")
contrib_rdd <- readRDS("topic-outcome-models/020325_CONTRIBUTING_commit_topic_model.rda") contrib_rdd <- readRDS("topic-outcome-models/020725_CONTRIBUTING_commit_topic_model.rda")
texreg(readme_rdd, stars=NULL, digits=3, use.packages=FALSE, texreg(readme_rdd, stars=NULL, digits=3, use.packages=FALSE,
custom.model.names=c( 'README'), custom.model.names=c( 'README'),
custom.coef.names=c('Topic 1', 'Topic 2', 'Topic 3', 'Topic 4', 'Topic 5', 'Topic 6', 'Topic 7', 'Topic 8', 'Topic 9', 'Topic 10', 'Topic 11'), custom.coef.names=c('Topic 1', 'Topic 2', 'Topic 3', 'Topic 4', 'Topic 5', 'Topic 6', 'Topic 7', 'Topic 8', 'Topic 9'),
table=FALSE, ci.force = TRUE) table=FALSE, ci.force = TRUE)
texreg(contrib_rdd, stars=NULL, digits=3, use.packages=FALSE, texreg(contrib_rdd, stars=NULL, digits=3, use.packages=FALSE,