early version
This commit is contained in:
parent
aefbee45ba
commit
81b1b6c3ee
@ -10,9 +10,7 @@ knitr::opts_chunk$set(echo = TRUE)
|
||||
library('ggplot2')
|
||||
library('scales')
|
||||
setwd("~/Research/cdsc_examples_repository/R_examples")
|
||||
#load('../processed_data/artDF.RData')
|
||||
load('~/Research/taboo/processed_data/EDA.RData')
|
||||
#need vDF, qDF, revDF.clean
|
||||
load('~/Research/cdsc_examples_repository/R_examples/data/EDA.RData')
|
||||
|
||||
reverselog_trans <- function(base = exp(1)) {
|
||||
trans <- function(x) -log(x, base)
|
||||
@ -209,14 +207,8 @@ hist(log(artDF$revid))
|
||||
#hist(log(artDF$got_reverted))
|
||||
#hist(log(artDF$revid))
|
||||
|
||||
## try a wilcoxen/mann-whitney u test
|
||||
###both are skewed, need to do a rank correlation.
|
||||
cor.test(artDF$pct_revert, as.numeric(artDF$taboo))
|
||||
cor.test(artDF$revid, artDF$pct_revert)
|
||||
##build the R plots of all the visualizations and distributions
|
||||
## do the same type of EDA for article-level dataset
|
||||
## every time there is skew think through tests/models to match
|
||||
## log-log, hists and boxplots, colored by taboo, etc.
|
||||
|
||||
#drop Barack Obama
|
||||
artDF <- subset(artDF, artDF$taboo!='0.5')
|
||||
|
Loading…
Reference in New Issue
Block a user