17
0

early version

This commit is contained in:
Kaylea Champion 2022-11-21 19:58:55 -08:00
parent aefbee45ba
commit 81b1b6c3ee

View File

@ -10,9 +10,7 @@ knitr::opts_chunk$set(echo = TRUE)
library('ggplot2')
library('scales')
setwd("~/Research/cdsc_examples_repository/R_examples")
#load('../processed_data/artDF.RData')
load('~/Research/taboo/processed_data/EDA.RData')
#need vDF, qDF, revDF.clean
load('~/Research/cdsc_examples_repository/R_examples/data/EDA.RData')
reverselog_trans <- function(base = exp(1)) {
trans <- function(x) -log(x, base)
@ -209,14 +207,8 @@ hist(log(artDF$revid))
#hist(log(artDF$got_reverted))
#hist(log(artDF$revid))
## try a wilcoxen/mann-whitney u test
###both are skewed, need to do a rank correlation.
cor.test(artDF$pct_revert, as.numeric(artDF$taboo))
cor.test(artDF$revid, artDF$pct_revert)
##build the R plots of all the visualizations and distributions
## do the same type of EDA for article-level dataset
## every time there is skew think through tests/models to match
## log-log, hists and boxplots, colored by taboo, etc.
#drop Barack Obama
artDF <- subset(artDF, artDF$taboo!='0.5')