updates to topic_outcome models

This commit is contained in:
Matthew Gaughan 2024-11-07 14:34:50 -06:00
parent 9ebad53df9
commit d22992ee5e
5 changed files with 31 additions and 24 deletions

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -1,10 +1,8 @@
library(stringr)
library(plyr)
contrib_topics_df <- read_csv("../text_analysis/contrib_file_topic_distributions.csv")
library(tidyverse)
contrib_topics_df <- read_csv("text_analysis/contrib_file_topic_distributions.csv")
colMeans(subset(contrib_topics_df, select = -filename))
contrib_df <- read_csv("../final_data/deb_contrib_did.csv")
contrib_pop_df <- read_csv("../final_data/deb_contrib_pop_change.csv")
contrib_df <- read_csv("final_data/deb_contrib_did.csv")
contrib_pop_df <- read_csv("final_data/deb_contrib_pop_change.csv")
median(contrib_df$age_in_days)
@ -80,15 +78,22 @@ contrib_total_df$logged_contrib = log1p(contrib_total_df$after_contrib_new)
contrib_total_df$logged_commits = log1p(contrib_total_df$summed_count)
#running regressions
library(MASS)
contrib_ <- glm.nb(logged_contrib ~ t0 + t1 + t2 + t3, data = contrib_total_df)
commits_ <- glm.nb(logged_commits ~ t0 + t1 + t2 + t3, data = contrib_total_df)
qqnorm(residuals(lm1))
contrib_ <- glm.nb(logged_contrib ~ 0 + t0 + t1 + t2 + t3, data = contrib_total_df)
commits_ <- glm.nb(logged_commits ~ 0 + t0 + t1 + t2 + t3, data = contrib_total_df)
qqnorm(residuals(commits_))
summary(contrib_)
summary(commits_)
texreg(list(contrib_, commits_), stars=NULL, digits=3, use.packages=FALSE,
custom.model.names=c( 'Contributions','Commits'),
custom.coef.names=c('(Intercept)', 'Topic 1', 'Topic 2', 'Topic 3'),
library(texreg)
readme_commits_ <- readRDS('1107_topic_commitoutcome_readme.rda')
texreg(list(readme_commits_, commits_), stars=NULL, digits=3, use.packages=FALSE,
custom.model.names=c( 'README','Contributing'),
custom.coef.names=c('Topic 1', 'Topic 2', 'Topic 3', 'Topic 4', 'Topic 5', 'Topic 6', 'Topic 7', 'Topic 8'),
table=FALSE, ci.force = TRUE)
saveRDS(commits_, "0731_topic_commitoutcome_contrib.rda")
saveRDS(contrib_, "0731_topic_contriboutcome_contrib.rda")
texreg( commits_, stars=NULL, digits=3, use.packages=FALSE,
custom.model.names=c( 'Commits'),
custom.coef.names=c('Topic 1', 'Topic 2', 'Topic 3', 'Topic 4'),
table=FALSE, ci.force = TRUE)
saveRDS(commits_, "1107_topic_commitoutcome_contrib.rda")
saveRDS(contrib_, "1107_topic_contriboutcome_contrib.rda")

View File

@ -1,10 +1,10 @@
library(stringr)
library(tidyverse)
readme_topics_df <- read_csv("../text_analysis/readme_file_topic_distributions.csv")
readme_topics_df <- read_csv("text_analysis/readme_file_topic_distributions.csv")
colMeans(subset(readme_topics_df, select = -filename))
readme_df <- read_csv("../final_data/deb_readme_did.csv")
readme_pop_df <- read_csv("../final_data/deb_readme_pop_change.csv")
readme_df <- read_csv("final_data/deb_readme_did.csv")
readme_pop_df <- read_csv("final_data/deb_readme_pop_change.csv")
#get the readmeution count
#some preprocessing and expansion
@ -76,18 +76,20 @@ readme_total_df$commit_by_contrib = readme_total_df$summed_count *readme_total_d
readme_total_df$logged_outcome = log1p(readme_total_df$commit_by_contrib)
readme_total_df$logged_contrib = log1p(readme_total_df$after_contrib_new)
readme_total_df$logged_commits = log1p(readme_total_df$summed_count)
readme_total_df$t4t5 = readme_total_df$t4 + readme_total_df$t5
#running regressions
library(MASS)
lm1 <- glm.nb(logged_contrib~ t0+t1+t2+t7+t3 +t6 + t5, data = readme_total_df)
qqnorm(residuals(lm1))
summary(lm1)
#saveRDS(lm1, "0725_topic_contriboutcome_readme.rda")
contrib_ <- glm.nb(logged_contrib~ 0 + t0 + t1 + t2 + t3 + t4 + t5 + t6 + t7, data = readme_total_df)
commits_ <- glm.nb(logged_commits~ 0 + t0 + t1 + t2 + t3 + t4 + t5 + t6 + t7, data = readme_total_df)
qqnorm(residuals(commits_))
summary(commits_)
saveRDS(commits_, "1107_topic_commitoutcome_readme.rda")
contrib_ <- glm.nb(logged_contrib~ t0+t1+t2+t3+ t5 +t6 +t7, data = readme_total_df)
commit_ <- glm.nb(logged_commits~ t0+t1+t2+t3+ t5 +t6 +t7, data = readme_total_df)
library(texreg)
texreg(list(contrib_, commit_), stars=NULL, digits=3, use.packages=FALSE,
custom.model.names=c( 'Contributions','Commits'),
custom.coef.names=c('(Intercept)', 'Topic 1', 'Topic 2', 'Topic 3', 'Topic 4', 'Topic 6', 'Topic 7', 'Topic 8'),
texreg(commits_, stars=NULL, digits=3, use.packages=FALSE,
custom.model.names=c( 'Commits'),
custom.coef.names=c( 'Topic 1', 'Topic 2', 'Topic 3', 'Topic 4', 'Topic 5', 'Topic 6', 'Topic 7', 'Topic 8'),
table=FALSE, ci.force = TRUE)