1
0

update with cresc info and plotting

This commit is contained in:
Matthew Gaughan 2025-02-08 15:15:55 -08:00
parent 212e68a056
commit e8afd485ed
8 changed files with 29 additions and 24 deletions

BIN
.RData

Binary file not shown.

Binary file not shown.

0
intersection.R Normal file
View File

View File

@ -1,17 +1,17 @@
1. SSH tunnel from your workstation using the following command:
ssh -N -L 8787:n3436:56811 mjilg@klone.hyak.uw.edu
ssh -N -L 8787:n3433:51613 mjilg@klone.hyak.uw.edu
and point your web browser to http://localhost:8787
2. log in to RStudio Server using the following credentials:
user: mjilg
password: +g73U+bdF4uygmNdsKEt
password: ezvnunJrjaDZsvr0nhPR
When done using RStudio Server, terminate the job by:
1. Exit the RStudio Session ("power" button in the top right corner of the RStudio window)
2. Issue the following command on the login node:
scancel -f 24085748
scancel -f 24092572

View File

@ -2,23 +2,23 @@ library(dplyr)
library(lubridate)
library(rdd)
contributing_df_filepath <- "/mmfs1/gscratch/comdata/users/mjilg/govdoc-cr-data/final_data/CONTRIBUTING_weekly_count_data.csv"
contributing_df_filepath <- "/mmfs1/gscratch/comdata/users/mjilg/govdoc-cr-data/final_data/metadata/final_0207_CONTRIBUTING_weekly_count_data.csv"
contributing_df = read.csv(contributing_df_filepath, header = TRUE)
window_num <- 5
contributing_df <- contributing_df |>
filter(week_index >= (- window_num) & week_index <= (window_num)) |>
filter(relative_week >= (- window_num) & relative_week <= (0)) |>
mutate(scaled_age = scale(age)) |>
mutate(scaled_age_at_commit = scale(age_at_commit))|>
mutate(log1p_count = log1p(commit_count)) |>
mutate(C = ifelse(week_index > -4 & week_index <= 0, 1,0))
mutate(C = ifelse(relative_week > -3 & relative_week <= 0, 1,0))
library(lme4)
library(optimx)
library(lattice)
all_gmodel <- glmer.nb(log1p_count ~ C * week_index + scaled_age + scaled_age_at_commit + (C * week_index | project_id),
all_gmodel <- glmer.nb(log1p_count ~ C * relative_week + scaled_age + scaled_age_at_commit + (C * relative_week | project_id),
control=glmerControl(optimizer="bobyqa",
optCtrl=list(maxfun=2e5)), nAGQ=0, data=contributing_df)
summary(all_gmodel)
saveRDS(all_gmodel, "020425_contributing_crescendo_model.rda")
saveRDS(all_gmodel, "020725_contributing_crescendo_model.rda")

View File

@ -1,9 +1,9 @@
contributing_count_filepath <- "/mmfs1/gscratch/comdata/users/mjilg/govdoc-cr-data/final_data/CONTRIBUTING_weekly_count_data.csv"
contributing_count_filepath <- "/mmfs1/gscratch/comdata/users/mjilg/govdoc-cr-data/final_data/metadata/final_0207_CONTRIBUTING_weekly_count_data.csv"
contributing_count_df = read.csv(contributing_count_filepath, header = TRUE)
window_num <- 5
contributing_count_df <- contributing_count_df |>
filter(week_index >= (- window_num) & week_index <= (window_num)) |>
filter(relative_week >= (- window_num) & relative_week <= (window_num)) |>
mutate(scaled_age = scale(age)) |>
mutate(scaled_age_at_commit = scale(age_at_commit))|>
mutate(log1p_count = log1p(commit_count))
@ -21,18 +21,19 @@ quantile(aggregate_cccd$avg_weekly_commits, probs = c(0.25, 0.5, 0.75))
quantile(aggregate_cccd$authors_before, probs = c(0.25, 0.5, 0.75))
quantile(aggregate_cccd$authors_after, probs = c(0.25, 0.5, 0.75))
readme_count_filepath <- "/mmfs1/gscratch/comdata/users/mjilg/govdoc-cr-data/final_data/README_weekly_count_data.csv"
readme_count_filepath <- "/mmfs1/gscratch/comdata/users/mjilg/govdoc-cr-data/final_data/metadata/final_0207_README_weekly_count_data.csv"
readme_count_df = read.csv(readme_count_filepath, header = TRUE)
window_num <- 5
readme_count_df <- readme_count_df |>
filter(week_index >= (- window_num) & week_index <= (window_num)) |>
filter(relative_week >= (- window_num) & relative_week <= (window_num)) |>
mutate(scaled_age = scale(age)) |>
mutate(scaled_age_at_commit = scale(age_at_commit))|>
mutate(log1p_count = log1p(commit_count))
quantile(readme_count_df$age / 365, probs = c(0.25, 0.5, 0.75))
quantile(readme_count_df$age_at_commit / 365, probs = c(0.25, 0.5, 0.75))
sd(readme_count_df$age_at_commit / 365)
aggregate_rccd <- readme_count_df |>
group_by(project_id) |>
summarize(
@ -44,25 +45,30 @@ quantile(aggregate_rccd$avg_weekly_commits, probs = c(0.25, 0.5, 0.75))
quantile(aggregate_rccd$authors_before, probs = c(0.25, 0.5, 0.75))
quantile(aggregate_rccd$authors_after, probs = c(0.25, 0.5, 0.75))
readme_readability <- read.csv("text_analysis/020325_README_readability.csv", header=TRUE)
readme_readability <- read.csv("text_analysis/020725_README_readability.csv", header=TRUE)
median(readme_readability$mcalpine_eflaw)
median(readme_readability$linsear_write_formula)
median(readme_readability$flesch_reading_ease)
median(readme_readability$reading_time)
readme_less_than_10 <- readme_readability |>
filter(reading_time < 10)
contributing_readability <- read.csv("text_analysis/020125_CONTRIBUTING_readability.csv", header=TRUE)
contributing_readability <- read.csv("text_analysis/020725_CONTRIBUTING_readability.csv", header=TRUE)
median(contributing_readability$mcalpine_eflaw)
median(contributing_readability$linsear_write_formula)
median(contributing_readability$flesch_reading_ease)
median(contributing_readability$reading_time)
contrib_less_than_10 <- contributing_readability |>
filter(reading_time < 10)
contributing_topic <- read.csv("text_analysis/020125_CONTRIBUTING_file_topic_distributions.csv", header=TRUE)
contributing_topic <- read.csv("text_analysis/020725_CONTRIBUTING_file_topic_distributions.csv", header=TRUE)
quantile(contributing_topic$t0, probs = c(0.25, 0.5, 0.75))
quantile(contributing_topic$t1, probs = c(0.25, 0.5, 0.75))
quantile(contributing_topic$t2, probs = c(0.25, 0.5, 0.75))
quantile(contributing_topic$t3, probs = c(0.25, 0.5, 0.75))
quantile(contributing_topic$t4, probs = c(0.25, 0.5, 0.75))
readme_topic <- read.csv("text_analysis/020325_README_file_topic_distributions.csv", header=TRUE)
readme_topic <- read.csv("text_analysis/020725_README_file_topic_distributions.csv", header=TRUE)
quantile(readme_topic$t0, probs = c(0.25, 0.5, 0.75))
quantile(readme_topic$t1, probs = c(0.25, 0.5, 0.75))
quantile(readme_topic$t2, probs = c(0.25, 0.5, 0.75))
@ -72,8 +78,6 @@ quantile(readme_topic$t5, probs = c(0.25, 0.5, 0.75))
quantile(readme_topic$t6, probs = c(0.25, 0.5, 0.75))
quantile(readme_topic$t7, probs = c(0.25, 0.5, 0.75))
quantile(readme_topic$t8, probs = c(0.25, 0.5, 0.75))
quantile(readme_topic$t9, probs = c(0.25, 0.5, 0.75))
quantile(readme_topic$t10, probs = c(0.25, 0.5, 0.75))

View File

@ -1,4 +1,5 @@
contributing_manifest <- read.csv("text_analysis/0203_contributing_merged_manifest.csv", header=TRUE)
readme_manifest <- read.csv("text_analysis/0203_readme_merged_manifest.csv", header=TRUE)
contributing_manifest <- read.csv("text_analysis/0207_contributing_merged_manifest.csv", header=TRUE)
readme_manifest <- read.csv("text_analysis/0207_readme_merged_manifest.csv", header=TRUE)
overlap = inner_join(contributing_manifest, readme_manifest, by="repo_id")
length(overlap)

View File

@ -1,11 +1,11 @@
library(texreg)
readme_rdd <- readRDS("topic-outcome-models/020325_README_commit_topic_model.rda")
contrib_rdd <- readRDS("topic-outcome-models/020325_CONTRIBUTING_commit_topic_model.rda")
readme_rdd <- readRDS("topic-outcome-models/020725_README_commit_topic_model.rda")
contrib_rdd <- readRDS("topic-outcome-models/020725_CONTRIBUTING_commit_topic_model.rda")
texreg(readme_rdd, stars=NULL, digits=3, use.packages=FALSE,
custom.model.names=c( 'README'),
custom.coef.names=c('Topic 1', 'Topic 2', 'Topic 3', 'Topic 4', 'Topic 5', 'Topic 6', 'Topic 7', 'Topic 8', 'Topic 9', 'Topic 10', 'Topic 11'),
custom.coef.names=c('Topic 1', 'Topic 2', 'Topic 3', 'Topic 4', 'Topic 5', 'Topic 6', 'Topic 7', 'Topic 8', 'Topic 9'),
table=FALSE, ci.force = TRUE)
texreg(contrib_rdd, stars=NULL, digits=3, use.packages=FALSE,