diff --git a/R_examples/EDA.Rmd b/R_examples/EDA.Rmd index 28f89d0..ec597ab 100644 --- a/R_examples/EDA.Rmd +++ b/R_examples/EDA.Rmd @@ -10,9 +10,7 @@ knitr::opts_chunk$set(echo = TRUE) library('ggplot2') library('scales') setwd("~/Research/cdsc_examples_repository/R_examples") -#load('../processed_data/artDF.RData') -load('~/Research/taboo/processed_data/EDA.RData') -#need vDF, qDF, revDF.clean +load('~/Research/cdsc_examples_repository/R_examples/data/EDA.RData') reverselog_trans <- function(base = exp(1)) { trans <- function(x) -log(x, base) @@ -209,14 +207,8 @@ hist(log(artDF$revid)) #hist(log(artDF$got_reverted)) #hist(log(artDF$revid)) -## try a wilcoxen/mann-whitney u test -###both are skewed, need to do a rank correlation. cor.test(artDF$pct_revert, as.numeric(artDF$taboo)) cor.test(artDF$revid, artDF$pct_revert) -##build the R plots of all the visualizations and distributions -## do the same type of EDA for article-level dataset -## every time there is skew think through tests/models to match -## log-log, hists and boxplots, colored by taboo, etc. #drop Barack Obama artDF <- subset(artDF, artDF$taboo!='0.5')