diff --git a/1107_topic_commitoutcome_contrib.rda b/1107_topic_commitoutcome_contrib.rda new file mode 100644 index 0000000..611ee91 Binary files /dev/null and b/1107_topic_commitoutcome_contrib.rda differ diff --git a/1107_topic_commitoutcome_readme.rda b/1107_topic_commitoutcome_readme.rda new file mode 100644 index 0000000..12f30e9 Binary files /dev/null and b/1107_topic_commitoutcome_readme.rda differ diff --git a/1107_topic_contriboutcome_contrib.rda b/1107_topic_contriboutcome_contrib.rda new file mode 100644 index 0000000..ec04a35 Binary files /dev/null and b/1107_topic_contriboutcome_contrib.rda differ diff --git a/R/contrib_topic_outcomes.R b/R/contrib_topic_outcomes.R index 9b78b98..c535bac 100644 --- a/R/contrib_topic_outcomes.R +++ b/R/contrib_topic_outcomes.R @@ -1,10 +1,8 @@ - -library(stringr) -library(plyr) -contrib_topics_df <- read_csv("../text_analysis/contrib_file_topic_distributions.csv") +library(tidyverse) +contrib_topics_df <- read_csv("text_analysis/contrib_file_topic_distributions.csv") colMeans(subset(contrib_topics_df, select = -filename)) -contrib_df <- read_csv("../final_data/deb_contrib_did.csv") -contrib_pop_df <- read_csv("../final_data/deb_contrib_pop_change.csv") +contrib_df <- read_csv("final_data/deb_contrib_did.csv") +contrib_pop_df <- read_csv("final_data/deb_contrib_pop_change.csv") median(contrib_df$age_in_days) @@ -80,15 +78,22 @@ contrib_total_df$logged_contrib = log1p(contrib_total_df$after_contrib_new) contrib_total_df$logged_commits = log1p(contrib_total_df$summed_count) #running regressions library(MASS) -contrib_ <- glm.nb(logged_contrib ~ t0 + t1 + t2 + t3, data = contrib_total_df) -commits_ <- glm.nb(logged_commits ~ t0 + t1 + t2 + t3, data = contrib_total_df) -qqnorm(residuals(lm1)) +contrib_ <- glm.nb(logged_contrib ~ 0 + t0 + t1 + t2 + t3, data = contrib_total_df) +commits_ <- glm.nb(logged_commits ~ 0 + t0 + t1 + t2 + t3, data = contrib_total_df) +qqnorm(residuals(commits_)) summary(contrib_) summary(commits_) -texreg(list(contrib_, commits_), stars=NULL, digits=3, use.packages=FALSE, - custom.model.names=c( 'Contributions','Commits'), - custom.coef.names=c('(Intercept)', 'Topic 1', 'Topic 2', 'Topic 3'), +library(texreg) +readme_commits_ <- readRDS('1107_topic_commitoutcome_readme.rda') +texreg(list(readme_commits_, commits_), stars=NULL, digits=3, use.packages=FALSE, + custom.model.names=c( 'README','Contributing'), + custom.coef.names=c('Topic 1', 'Topic 2', 'Topic 3', 'Topic 4', 'Topic 5', 'Topic 6', 'Topic 7', 'Topic 8'), table=FALSE, ci.force = TRUE) -saveRDS(commits_, "0731_topic_commitoutcome_contrib.rda") -saveRDS(contrib_, "0731_topic_contriboutcome_contrib.rda") +texreg( commits_, stars=NULL, digits=3, use.packages=FALSE, + custom.model.names=c( 'Commits'), + custom.coef.names=c('Topic 1', 'Topic 2', 'Topic 3', 'Topic 4'), + table=FALSE, ci.force = TRUE) + +saveRDS(commits_, "1107_topic_commitoutcome_contrib.rda") +saveRDS(contrib_, "1107_topic_contriboutcome_contrib.rda") diff --git a/R/readme_topic_outcomes.R b/R/readme_topic_outcomes.R index 67099df..01ca4b2 100644 --- a/R/readme_topic_outcomes.R +++ b/R/readme_topic_outcomes.R @@ -1,10 +1,10 @@ library(stringr) library(tidyverse) -readme_topics_df <- read_csv("../text_analysis/readme_file_topic_distributions.csv") +readme_topics_df <- read_csv("text_analysis/readme_file_topic_distributions.csv") colMeans(subset(readme_topics_df, select = -filename)) -readme_df <- read_csv("../final_data/deb_readme_did.csv") -readme_pop_df <- read_csv("../final_data/deb_readme_pop_change.csv") +readme_df <- read_csv("final_data/deb_readme_did.csv") +readme_pop_df <- read_csv("final_data/deb_readme_pop_change.csv") #get the readmeution count #some preprocessing and expansion @@ -76,18 +76,20 @@ readme_total_df$commit_by_contrib = readme_total_df$summed_count *readme_total_d readme_total_df$logged_outcome = log1p(readme_total_df$commit_by_contrib) readme_total_df$logged_contrib = log1p(readme_total_df$after_contrib_new) readme_total_df$logged_commits = log1p(readme_total_df$summed_count) +readme_total_df$t4t5 = readme_total_df$t4 + readme_total_df$t5 #running regressions library(MASS) -lm1 <- glm.nb(logged_contrib~ t0+t1+t2+t7+t3 +t6 + t5, data = readme_total_df) -qqnorm(residuals(lm1)) -summary(lm1) -#saveRDS(lm1, "0725_topic_contriboutcome_readme.rda") +contrib_ <- glm.nb(logged_contrib~ 0 + t0 + t1 + t2 + t3 + t4 + t5 + t6 + t7, data = readme_total_df) +commits_ <- glm.nb(logged_commits~ 0 + t0 + t1 + t2 + t3 + t4 + t5 + t6 + t7, data = readme_total_df) +qqnorm(residuals(commits_)) +summary(commits_) +saveRDS(commits_, "1107_topic_commitoutcome_readme.rda") contrib_ <- glm.nb(logged_contrib~ t0+t1+t2+t3+ t5 +t6 +t7, data = readme_total_df) commit_ <- glm.nb(logged_commits~ t0+t1+t2+t3+ t5 +t6 +t7, data = readme_total_df) library(texreg) -texreg(list(contrib_, commit_), stars=NULL, digits=3, use.packages=FALSE, - custom.model.names=c( 'Contributions','Commits'), - custom.coef.names=c('(Intercept)', 'Topic 1', 'Topic 2', 'Topic 3', 'Topic 4', 'Topic 6', 'Topic 7', 'Topic 8'), +texreg(commits_, stars=NULL, digits=3, use.packages=FALSE, + custom.model.names=c( 'Commits'), + custom.coef.names=c( 'Topic 1', 'Topic 2', 'Topic 3', 'Topic 4', 'Topic 5', 'Topic 6', 'Topic 7', 'Topic 8'), table=FALSE, ci.force = TRUE)