add the rest of the code.
This commit is contained in:
202
ordinal_quality_models.R
Normal file
202
ordinal_quality_models.R
Normal file
@@ -0,0 +1,202 @@
|
||||
source("RemembR/R/RemembeR.R")
|
||||
source("load_data.R")
|
||||
change.remember.file("ordinal.quality.model.RDS")
|
||||
|
||||
test <- F
|
||||
|
||||
remember(weights, "sample.weights")
|
||||
|
||||
n.holdout <- 4000
|
||||
remember(n.holdout,"n.holdout")
|
||||
holdout <- df[sample(.N,n.holdout)]
|
||||
saveRDS(holdout,'data/holdout_quality_labels.RDS')
|
||||
df <- df[!(revid %in% holdout$revid)]
|
||||
saveRDS(df,'data/training_quality_labels.RDS')
|
||||
|
||||
if(test == TRUE){
|
||||
df <- df[sample(.N,2000)]
|
||||
}
|
||||
|
||||
## So it turns out that the 6 predictors we have are highly correlated creating problems for sampling so use QR decomposition
|
||||
df <- df[!is.na(wp10)]
|
||||
|
||||
df[, start.p.stub := Start + Stub]
|
||||
|
||||
saveRDS(df,"data/training_quality_labels.RDS")
|
||||
|
||||
## So it turns out that the 6 predictors we have are highly correlated creating problems for sampling so use QR decomposition
|
||||
df <- df[!is.na(wp10)]
|
||||
|
||||
df[, start.p.stub := Start + Stub]
|
||||
|
||||
fam.cloglog <- sratio(link='cloglog', threshold='flexible')
|
||||
#formula.1 <- brmsformula(wp10 | weights(weight) ~ 1,decomp='QR',center=TRUE)
|
||||
|
||||
fam <- sratio(link='logit', threshold='flexible')
|
||||
fam.cumulative <- sratio(link='logit', threshold='flexible')
|
||||
|
||||
## It turns out that the matrix is singular if we include all the predictors.
|
||||
## C is the most correlated with the other variables so for now let's remove it.
|
||||
|
||||
## it turns out that we don't need to do model selection at all since we don't care about the coefficients.
|
||||
## we can just do the csv!
|
||||
x <- df[,.(Stub,Start,C,B,GA,FA)]
|
||||
|
||||
wpca <- function(x, weight){
|
||||
name <- names(x)
|
||||
x <- as.matrix(x)
|
||||
means <- unlist(lapply(1:dim(x)[2], function(i) weighted.mean(x[,i], weight)))
|
||||
names(means) <- name
|
||||
centered <- as.matrix(t(t(x) - means))
|
||||
weightmat <- diag(weight)
|
||||
covmat <- t(centered) %*% weightmat %*% centered / (sum(weight) - 1)
|
||||
|
||||
factors <- eigen(covmat)
|
||||
basis <- factors$vectors
|
||||
result <- centered %*% basis
|
||||
# return a list with the info we need to do the transformation
|
||||
return(list(means=means, basis=basis, x=result))
|
||||
}
|
||||
|
||||
#unweighted.pca <- wpca(df[,.(Stub,Start,C,B,GA,FA)],rep(1,nrow(df)))
|
||||
upca <- prcomp(df[,.(Stub,Start,C,B,GA,FA)])
|
||||
unweighted.pca <- list(means = upca$center, basis=upca$rotation, x=upca$x)
|
||||
saveRDS(unweighted.pca,"data/ores_pca_features.noweights.RDS")
|
||||
|
||||
weighted.pca <- wpca(df[,.(Stub,Start,C,B,GA,FA)],df$article_weight)
|
||||
saveRDS(weighted.pca, "data/ores_pca_features.RDS")
|
||||
|
||||
revision.pca <- wpca(df[,.(Stub,Start,C,B,GA,FA)],df$revision_weight)
|
||||
saveRDS(revision.pca, "data/ores_pca_features_revisions.RDS")
|
||||
|
||||
df <- df[,":="(pca1 = weighted.pca$x[,1],
|
||||
pca2 = weighted.pca$x[,2],
|
||||
pca3 = weighted.pca$x[,3],
|
||||
pca4 = weighted.pca$x[,4],
|
||||
pca5 = weighted.pca$x[,5],
|
||||
pca6 = weighted.pca$x[,6])]
|
||||
|
||||
df <- df[,":="(pca1.revision = revision.pca$x[,1],
|
||||
pca2.revision = revision.pca$x[,2],
|
||||
pca3.revision = revision.pca$x[,3],
|
||||
pca4.revision = revision.pca$x[,4],
|
||||
pca5.revision = revision.pca$x[,5],
|
||||
pca6.revision = revision.pca$x[,6])]
|
||||
|
||||
df <- df[,":="(pca1.noweights = unweighted.pca$x[,1],
|
||||
pca2.noweights = unweighted.pca$x[,2],
|
||||
pca3.noweights = unweighted.pca$x[,3],
|
||||
pca4.noweights = unweighted.pca$x[,4],
|
||||
pca5.noweights = unweighted.pca$x[,5],
|
||||
pca6.noweights = unweighted.pca$x[,6])]
|
||||
|
||||
qformula.main.pca.cs <- brmsformula(wp10 | weights(article_weight) ~ cs(pca1) + cs(pca2) + cs(pca3) + cs(pca4) + cs(pca5))
|
||||
formula.main.pca.noweights.cs <- brmsformula(wp10 ~ cs(pca1.noweights) + cs(pca2.noweights) + cs(pca3.noweights) + cs(pca4.noweights) + cs(pca5.noweights))
|
||||
formula.revision.pca.cs <- brmsformula(wp10 | weights(revision_weight) ~ cs(pca1.revision) + cs(pca2.revision) + cs(pca3.revision) + cs(pca4.revision) + cs(pca5.revision))
|
||||
formula.qe6.cs <- brmsformula(wp10 | weights(article_weight) ~ cs(quality.even6))
|
||||
formula.qe6.revision.cs <- brmsformula(wp10 | weights(revision_weight) ~ cs(quality.even6))
|
||||
formula.qe6.noweights.cs <- brmsformula(wp10 ~ cs(quality.even6))
|
||||
|
||||
formula.main.pca <- brmsformula(wp10 | weights(article_weight) ~ pca1 + pca2 + pca3 + pca4 + pca5)
|
||||
formula.main.pca.noweights <- brmsformula(wp10 ~ pca1.noweights + pca2.noweights + pca3.noweights + pca4.noweights + pca5.noweights)
|
||||
formula.revision.pca <- brmsformula(wp10 | weights(revision_weight) ~ pca1.revision + pca2.revision + pca3.revision + pca4.revision + pca5.revision)
|
||||
formula.qe6 <- brmsformula(wp10 | weights(article_weight) ~ quality.even6)
|
||||
formula.qe6.revision <- brmsformula(wp10 | weights(revision_weight) ~ quality.even6)
|
||||
formula.qe6.noweights <- brmsformula(wp10 ~ quality.even6)
|
||||
|
||||
formula.scores.noweights <- brmsformula(wp10 ~ Start + Stub + GA + FA + B)
|
||||
|
||||
|
||||
library(future)
|
||||
library(parallel)
|
||||
options(mc.cores = parallel::detectCores())
|
||||
|
||||
plan(lapply(1:7,function(x) tweak(multisession, workers=4)))
|
||||
|
||||
model.main.pca %<-% brm(formula=formula.main.pca, data=df, family=fam, control=list(max_treedepth=15), future=TRUE, save_pars=save_pars(all=TRUE))
|
||||
model.qe6 %<-% brm(formula.qe6, data=df, family=fam, control=list(max_treedepth=15),future=TRUE,save_pars=save_pars(all=TRUE))
|
||||
|
||||
model.main.revision %<-% brm(formula=formula.revision.pca, data=df, family=fam, control=list(max_treedepth=15), future=TRUE, save_pars=save_pars(all=TRUE))
|
||||
|
||||
model.qe6.revision %<-% brm(formula.qe6.revision, data=df, family=fam, control=list(max_treedepth=15),future=TRUE,save_pars=save_pars(all=TRUE))
|
||||
model.qe6.noweights %<-% brm(formula.qe6.noweights, data=df, family=fam, control=list(max_treedepth=15),future=TRUE,save_pars=save_pars(all=TRUE))
|
||||
model.main.pca.noweights %<-% brm(formula=formula.main.pca.noweights, data=df, family=fam, control=list(max_treedepth=15), future=TRUE,save_pars=save_pars(all=TRUE))
|
||||
|
||||
## model.main.pca.cs %<-% brm(formula=formula.main.pca.cs, data=df, family=fam, control=list(max_treedepth=15), future=TRUE, save_pars=save_pars(all=TRUE))
|
||||
## model.qe6.cs %<-% brm(formula.qe6.cs, data=df, family=fam, control=list(max_treedepth=15),future=TRUE,save_pars=save_pars(all=TRUE))
|
||||
|
||||
## model.main.revision.cs %<-% brm(formula=formula.revision.pca.cs, data=df, family=fam, control=list(max_treedepth=15), future=TRUE, save_pars=save_pars(all=TRUE))
|
||||
|
||||
## model.qe6.revision.cs %<-% brm(formula.qe6.revision.cs, data=df, family=fam, control=list(max_treedepth=15),future=TRUE,save_pars=save_pars(all=TRUE))
|
||||
## model.qe6.noweights.cs %<-% brm(formula.qe6.noweights.cs, data=df, family=fam, control=list(max_treedepth=15),future=TRUE,save_pars=save_pars(all=TRUE))
|
||||
## model.main.pca.noweights.cs %<-% brm(formula=formula.main.pca.noweights.cs, data=df, family=fam, control=list(max_treedepth=15), future=TRUE,save_pars=save_pars(all=TRUE))
|
||||
|
||||
model.main.pca.cumulative %<-% brm(formula=formula.main.pca, data=df, family=fam.cumulative, control=list(max_treedepth=15), future=TRUE, save_pars=save_pars(all=TRUE))
|
||||
model.qe6.cumulative %<-% brm(formula.qe6, data=df, family=fam.cumulative, control=list(max_treedepth=15),future=TRUE,save_pars=save_pars(all=TRUE))
|
||||
|
||||
model.main.revision.cumulative %<-% brm(formula=formula.revision.pca, data=df, family=fam.cumulative, control=list(max_treedepth=15), future=TRUE, save_pars=save_pars(all=TRUE))
|
||||
|
||||
model.qe6.revision.cumulative %<-% brm(formula.qe6.revision, data=df, family=fam.cumulative, control=list(max_treedepth=15),future=TRUE,save_pars=save_pars(all=TRUE))
|
||||
model.qe6.noweights.cumulative %<-% brm(formula.qe6.noweights, data=df, family=fam.cumulative, control=list(max_treedepth=15),future=TRUE,save_pars=save_pars(all=TRUE))
|
||||
model.main.pca.noweights.cumulative %<-% brm(formula=formula.main.pca.noweights, data=df, family=fam.cumulative, control=list(max_treedepth=15), future=TRUE,save_pars=save_pars(all=TRUE))
|
||||
|
||||
|
||||
#model.scores.noweights <- brm(formula=formula.scores.noweights, data=df, family=fam, control=list(max_treedepth=15), future=TRUE,save_pars=save_pars(all=TRUE))
|
||||
|
||||
models <- resolve(globalenv(),result=F)
|
||||
print("adding loo")
|
||||
|
||||
model.main.revision <- add_criterion(model.main.revision,'loo',moment_match=T)
|
||||
model.main.pca <- add_criterion(model.main.pca,'loo',moment_match=T)
|
||||
model.qe6.revision <- add_criterion(model.qe6.revision,'loo')
|
||||
model.qe6 <- add_criterion(model.qe6,'loo')
|
||||
model.main.pca.noweights <- add_criterion(model.main.pca.noweights,'loo',moment_match=T)
|
||||
model.qe6.noweights <- add_criterion(model.qe6.noweights,'loo')
|
||||
|
||||
model.main.revision.cumulative <- add_criterion(model.main.revision.cumulative,'loo',moment_match=T)
|
||||
model.main.pca.cumulative <- add_criterion(model.main.pca.cumulative,'loo',moment_match=T)
|
||||
model.qe6.revision.cumulative <- add_criterion(model.qe6.revision.cumulative,'loo')
|
||||
model.qe6.cumulative <- add_criterion(model.qe6.cumulative,'loo')
|
||||
model.main.pca.noweights.cumulative <- add_criterion(model.main.pca.noweights.cumulative,'loo',moment_match=T)
|
||||
model.qe6.noweights.cumulative <- add_criterion(model.qe6.noweights.cumulative,'loo')
|
||||
|
||||
|
||||
## model.main.revision.cs <- add_criterion(model.main.revision.cs,'loo',moment_match=T)
|
||||
## model.main.pca.cs <- add_criterion(model.main.pca.cs,'loo',moment_match=T)
|
||||
## model.qe6.revision.cs <- add_criterion(model.qe6.revision.cs,'loo')
|
||||
## model.qe6.cs <- add_criterion(model.qe6.cs,'loo')
|
||||
## model.main.pca.noweights.cs <- add_criterion(model.main.pca.noweights.cs,'loo',moment_match=T)
|
||||
## model.qe6.noweights.cs <- add_criterion(model.qe6.noweights.cs,'loo')
|
||||
|
||||
saveRDS(model.qe6.revision,"models/ordinal_quality_qe6_revision.RDS")
|
||||
saveRDS(model.qe6,"models/ordinal_quality_qe6.RDS")
|
||||
saveRDS(model.main.pca.noweights,"models/ordinal_quality_pca.noweights.RDS")
|
||||
saveRDS(model.qe6.noweights,"models/ordinal_quality_qe6.noweights.RDS")
|
||||
saveRDS(model.main.pca,"models/ordinal_quality_pca.RDS")
|
||||
saveRDS(model.main.revision,"models/ordinal_quality_pca_revision.RDS")
|
||||
|
||||
saveRDS(model.qe6.revision.cumulative,"models/ordinal_quality_qe6_revision.cumulative.RDS")
|
||||
saveRDS(model.qe6.cumulative,"models/ordinal_quality_qe6.cumulative.RDS")
|
||||
saveRDS(model.main.pca.noweights.cumulative,"models/ordinal_quality_pca.noweights.cumulative.RDS")
|
||||
saveRDS(model.qe6.noweights.cumulative,"models/ordinal_quality_qe6.noweights.cumulative.RDS")
|
||||
saveRDS(model.main.pca.cumulative,"models/ordinal_quality_pca.cumulative.RDS")
|
||||
saveRDS(model.main.revision.cumulative,"models/ordinal_quality_pca_revision.cumulative.RDS")
|
||||
|
||||
## saveRDS(model.qe6.revision.cs,"models/ordinal_quality_qe6_revision.RDS")
|
||||
## saveRDS(model.qe6.cs,"models/ordinal_quality_qe6.RDS")
|
||||
## saveRDS(model.main.pca.noweights.cs,"models/ordinal_quality_pca.noweights.RDS")
|
||||
## saveRDS(model.qe6.noweights.cs,"models/ordinal_quality_qe6.noweights.RDS")
|
||||
## saveRDS(model.main.pca.cs,"models/ordinal_quality_pca.RDS")
|
||||
## saveRDS(model.main.revision.cs,"models/ordinal_quality_pca_revision.RDS")
|
||||
|
||||
|
||||
|
||||
models <- resolve(globalenv(),result=F)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user