From 81b1b6c3ee80116fe775541558575cd53e78a59c Mon Sep 17 00:00:00 2001 From: Kaylea Champion Date: Mon, 21 Nov 2022 19:58:55 -0800 Subject: [PATCH] early version --- R_examples/EDA.Rmd | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/R_examples/EDA.Rmd b/R_examples/EDA.Rmd index 28f89d0..ec597ab 100644 --- a/R_examples/EDA.Rmd +++ b/R_examples/EDA.Rmd @@ -10,9 +10,7 @@ knitr::opts_chunk$set(echo = TRUE) library('ggplot2') library('scales') setwd("~/Research/cdsc_examples_repository/R_examples") -#load('../processed_data/artDF.RData') -load('~/Research/taboo/processed_data/EDA.RData') -#need vDF, qDF, revDF.clean +load('~/Research/cdsc_examples_repository/R_examples/data/EDA.RData') reverselog_trans <- function(base = exp(1)) { trans <- function(x) -log(x, base) @@ -209,14 +207,8 @@ hist(log(artDF$revid)) #hist(log(artDF$got_reverted)) #hist(log(artDF$revid)) -## try a wilcoxen/mann-whitney u test -###both are skewed, need to do a rank correlation. cor.test(artDF$pct_revert, as.numeric(artDF$taboo)) cor.test(artDF$revid, artDF$pct_revert) -##build the R plots of all the visualizations and distributions -## do the same type of EDA for article-level dataset -## every time there is skew think through tests/models to match -## log-log, hists and boxplots, colored by taboo, etc. #drop Barack Obama artDF <- subset(artDF, artDF$taboo!='0.5')