updates to topic_outcome models
This commit is contained in:
parent
9ebad53df9
commit
d22992ee5e
BIN
1107_topic_commitoutcome_contrib.rda
Normal file
BIN
1107_topic_commitoutcome_contrib.rda
Normal file
Binary file not shown.
BIN
1107_topic_commitoutcome_readme.rda
Normal file
BIN
1107_topic_commitoutcome_readme.rda
Normal file
Binary file not shown.
BIN
1107_topic_contriboutcome_contrib.rda
Normal file
BIN
1107_topic_contriboutcome_contrib.rda
Normal file
Binary file not shown.
@ -1,10 +1,8 @@
|
|||||||
|
library(tidyverse)
|
||||||
library(stringr)
|
contrib_topics_df <- read_csv("text_analysis/contrib_file_topic_distributions.csv")
|
||||||
library(plyr)
|
|
||||||
contrib_topics_df <- read_csv("../text_analysis/contrib_file_topic_distributions.csv")
|
|
||||||
colMeans(subset(contrib_topics_df, select = -filename))
|
colMeans(subset(contrib_topics_df, select = -filename))
|
||||||
contrib_df <- read_csv("../final_data/deb_contrib_did.csv")
|
contrib_df <- read_csv("final_data/deb_contrib_did.csv")
|
||||||
contrib_pop_df <- read_csv("../final_data/deb_contrib_pop_change.csv")
|
contrib_pop_df <- read_csv("final_data/deb_contrib_pop_change.csv")
|
||||||
|
|
||||||
median(contrib_df$age_in_days)
|
median(contrib_df$age_in_days)
|
||||||
|
|
||||||
@ -80,15 +78,22 @@ contrib_total_df$logged_contrib = log1p(contrib_total_df$after_contrib_new)
|
|||||||
contrib_total_df$logged_commits = log1p(contrib_total_df$summed_count)
|
contrib_total_df$logged_commits = log1p(contrib_total_df$summed_count)
|
||||||
#running regressions
|
#running regressions
|
||||||
library(MASS)
|
library(MASS)
|
||||||
contrib_ <- glm.nb(logged_contrib ~ t0 + t1 + t2 + t3, data = contrib_total_df)
|
contrib_ <- glm.nb(logged_contrib ~ 0 + t0 + t1 + t2 + t3, data = contrib_total_df)
|
||||||
commits_ <- glm.nb(logged_commits ~ t0 + t1 + t2 + t3, data = contrib_total_df)
|
commits_ <- glm.nb(logged_commits ~ 0 + t0 + t1 + t2 + t3, data = contrib_total_df)
|
||||||
qqnorm(residuals(lm1))
|
qqnorm(residuals(commits_))
|
||||||
summary(contrib_)
|
summary(contrib_)
|
||||||
summary(commits_)
|
summary(commits_)
|
||||||
texreg(list(contrib_, commits_), stars=NULL, digits=3, use.packages=FALSE,
|
library(texreg)
|
||||||
custom.model.names=c( 'Contributions','Commits'),
|
readme_commits_ <- readRDS('1107_topic_commitoutcome_readme.rda')
|
||||||
custom.coef.names=c('(Intercept)', 'Topic 1', 'Topic 2', 'Topic 3'),
|
texreg(list(readme_commits_, commits_), stars=NULL, digits=3, use.packages=FALSE,
|
||||||
|
custom.model.names=c( 'README','Contributing'),
|
||||||
|
custom.coef.names=c('Topic 1', 'Topic 2', 'Topic 3', 'Topic 4', 'Topic 5', 'Topic 6', 'Topic 7', 'Topic 8'),
|
||||||
table=FALSE, ci.force = TRUE)
|
table=FALSE, ci.force = TRUE)
|
||||||
|
|
||||||
saveRDS(commits_, "0731_topic_commitoutcome_contrib.rda")
|
texreg( commits_, stars=NULL, digits=3, use.packages=FALSE,
|
||||||
saveRDS(contrib_, "0731_topic_contriboutcome_contrib.rda")
|
custom.model.names=c( 'Commits'),
|
||||||
|
custom.coef.names=c('Topic 1', 'Topic 2', 'Topic 3', 'Topic 4'),
|
||||||
|
table=FALSE, ci.force = TRUE)
|
||||||
|
|
||||||
|
saveRDS(commits_, "1107_topic_commitoutcome_contrib.rda")
|
||||||
|
saveRDS(contrib_, "1107_topic_contriboutcome_contrib.rda")
|
||||||
|
@ -1,10 +1,10 @@
|
|||||||
|
|
||||||
library(stringr)
|
library(stringr)
|
||||||
library(tidyverse)
|
library(tidyverse)
|
||||||
readme_topics_df <- read_csv("../text_analysis/readme_file_topic_distributions.csv")
|
readme_topics_df <- read_csv("text_analysis/readme_file_topic_distributions.csv")
|
||||||
colMeans(subset(readme_topics_df, select = -filename))
|
colMeans(subset(readme_topics_df, select = -filename))
|
||||||
readme_df <- read_csv("../final_data/deb_readme_did.csv")
|
readme_df <- read_csv("final_data/deb_readme_did.csv")
|
||||||
readme_pop_df <- read_csv("../final_data/deb_readme_pop_change.csv")
|
readme_pop_df <- read_csv("final_data/deb_readme_pop_change.csv")
|
||||||
|
|
||||||
#get the readmeution count
|
#get the readmeution count
|
||||||
#some preprocessing and expansion
|
#some preprocessing and expansion
|
||||||
@ -76,18 +76,20 @@ readme_total_df$commit_by_contrib = readme_total_df$summed_count *readme_total_d
|
|||||||
readme_total_df$logged_outcome = log1p(readme_total_df$commit_by_contrib)
|
readme_total_df$logged_outcome = log1p(readme_total_df$commit_by_contrib)
|
||||||
readme_total_df$logged_contrib = log1p(readme_total_df$after_contrib_new)
|
readme_total_df$logged_contrib = log1p(readme_total_df$after_contrib_new)
|
||||||
readme_total_df$logged_commits = log1p(readme_total_df$summed_count)
|
readme_total_df$logged_commits = log1p(readme_total_df$summed_count)
|
||||||
|
readme_total_df$t4t5 = readme_total_df$t4 + readme_total_df$t5
|
||||||
#running regressions
|
#running regressions
|
||||||
library(MASS)
|
library(MASS)
|
||||||
lm1 <- glm.nb(logged_contrib~ t0+t1+t2+t7+t3 +t6 + t5, data = readme_total_df)
|
contrib_ <- glm.nb(logged_contrib~ 0 + t0 + t1 + t2 + t3 + t4 + t5 + t6 + t7, data = readme_total_df)
|
||||||
qqnorm(residuals(lm1))
|
commits_ <- glm.nb(logged_commits~ 0 + t0 + t1 + t2 + t3 + t4 + t5 + t6 + t7, data = readme_total_df)
|
||||||
summary(lm1)
|
qqnorm(residuals(commits_))
|
||||||
#saveRDS(lm1, "0725_topic_contriboutcome_readme.rda")
|
summary(commits_)
|
||||||
|
saveRDS(commits_, "1107_topic_commitoutcome_readme.rda")
|
||||||
contrib_ <- glm.nb(logged_contrib~ t0+t1+t2+t3+ t5 +t6 +t7, data = readme_total_df)
|
contrib_ <- glm.nb(logged_contrib~ t0+t1+t2+t3+ t5 +t6 +t7, data = readme_total_df)
|
||||||
commit_ <- glm.nb(logged_commits~ t0+t1+t2+t3+ t5 +t6 +t7, data = readme_total_df)
|
commit_ <- glm.nb(logged_commits~ t0+t1+t2+t3+ t5 +t6 +t7, data = readme_total_df)
|
||||||
|
|
||||||
library(texreg)
|
library(texreg)
|
||||||
|
|
||||||
texreg(list(contrib_, commit_), stars=NULL, digits=3, use.packages=FALSE,
|
texreg(commits_, stars=NULL, digits=3, use.packages=FALSE,
|
||||||
custom.model.names=c( 'Contributions','Commits'),
|
custom.model.names=c( 'Commits'),
|
||||||
custom.coef.names=c('(Intercept)', 'Topic 1', 'Topic 2', 'Topic 3', 'Topic 4', 'Topic 6', 'Topic 7', 'Topic 8'),
|
custom.coef.names=c( 'Topic 1', 'Topic 2', 'Topic 3', 'Topic 4', 'Topic 5', 'Topic 6', 'Topic 7', 'Topic 8'),
|
||||||
table=FALSE, ci.force = TRUE)
|
table=FALSE, ci.force = TRUE)
|
||||||
|
Loading…
Reference in New Issue
Block a user