18
0

a few more tweaks

This commit is contained in:
Kaylea Champion 2022-11-21 20:23:25 -08:00
parent 81b1b6c3ee
commit ea7db24964

View File

@ -196,18 +196,13 @@ This examines the same data but with a relative ranking approach for quality. Ea
```{r H3descriptives}
##revision quality
artDF$pct_revert <- (artDF$got_reverted / artDF$revid) * 100
table(artDF$taboo)
table(artDF$source)
hist(artDF$got_reverted)
hist(log(artDF$got_reverted))
hist(artDF$revid)
hist(log(artDF$revid))
## facet these hists by source!!
#cor.test(artDF$got_reverted, as.numeric(artDF$taboo))
#cor.test(artDF$revid, as.numeric(artDF$taboo))
#hist(log(artDF$got_reverted))
#hist(log(artDF$revid))
cor.test(artDF$pct_revert, as.numeric(artDF$taboo))
cor.test(artDF$pct_revert, as.numeric(artDF$source))
cor.test(artDF$revid, artDF$pct_revert)
#drop Barack Obama
@ -215,16 +210,16 @@ artDF <- subset(artDF, artDF$taboo!='0.5')
table(artDF$taboo)
artDF$taboo <- as.logical(artDF$taboo)
g <- ggplot(artDF, aes(x=revid, y=pct_revert, color=taboo)) +
g <- ggplot(artDF, aes(x=revid, y=pct_revert, color=source)) +
geom_point() +
labs(x='Number of Revisions', y='Percent Reverted')
g
ggplot(artDF, aes(x=got_reverted, color=taboo)) +
ggplot(artDF, aes(x=got_reverted, color=source)) +
geom_boxplot()
g <- ggplot(artDF, aes(group=as.factor(taboo), x=revid, y=got_reverted, color=taboo)) +
g <- ggplot(artDF, aes(group=as.factor(taboo), x=revid, y=got_reverted, color=source)) +
geom_point(alpha=.2) +
geom_smooth() +
geom_rug(alpha=.2)+
@ -234,7 +229,7 @@ g <- ggplot(artDF, aes(group=as.factor(taboo), x=revid, y=got_reverted, color=ta
g
g <- ggplot(artDF, aes(group=as.factor(taboo), x=revid, y=pct_revert, color=taboo)) +
g <- ggplot(artDF, aes(group=as.factor(taboo), x=revid, y=pct_revert, color=source)) +
geom_point(alpha=.2) +
geom_smooth() +
geom_rug(alpha=.2)+
@ -281,7 +276,7 @@ xTabAnon <- xtabs(~source+revert+anon,data=revDF.clean.norev)
ftable(xTabAnon)
prop.table(ftable(xTabAnon), 1)
ggplot(revDF.clean.norev, aes(x=anon, color=taboo)) +
ggplot(revDF.clean.norev, aes(x=anon, color=source)) +
geom_boxplot()
```
@ -326,13 +321,13 @@ g
```{r}
summary(revDF.clean$userpage_text_chars)
g <- ggplot(revDF.clean, aes(x=log1p(userpage_text_chars), group=taboo)) +
g <- ggplot(revDF.clean, aes(x=log1p(userpage_text_chars), group=source)) +
geom_histogram(binwidth = .5) +
facet_grid(source~., scales='free_y')
g
g <- ggplot(subset(revDF.clean, revDF.clean$userpage_text_chars < exp(4)), aes(x=log1p(userpage_text_chars), group=taboo)) +
g <- ggplot(subset(revDF.clean, revDF.clean$userpage_text_chars < exp(4)), aes(x=log1p(userpage_text_chars), group=source)) +
geom_histogram(binwidth = .5) +
facet_grid(source~., scales='free_y')
@ -380,8 +375,6 @@ prop.table(table(gaveGenderDF$source, gaveGenderDF$gender), margin = 1)
table(revDF.clean$emailable)
table(userDF$emailable)
prop.table(table(revDF.clean$source, revDF.clean$emailable), margin = 1)
```
@ -390,19 +383,16 @@ prop.table(table(revDF.clean$source, revDF.clean$emailable), margin = 1)
```{r protection}
artDF.prot.only <- subset(artDF.prot, artDF.prot$pct.prot > 0)
artDF.prot.only <- subset(artDF.prot.only, artDF.prot.only$taboo != 0.5)
g <- ggplot(artDF.prot.only, aes(x=pct.prot, group=taboo)) +
g <- ggplot(artDF, aes(x=pct.prot, group=source)) +
geom_boxplot() +
labs(x='Protection Proportion')
g
t.artDF <- subset(artDF.prot.only, artDF.prot.only$pct.prot > 0)
artDF.prot <- subset(artDF, artDF$pct.prot > 0)
g <- ggplot(t.artDF, aes(x=pct.prot, group=taboo)) +
g <- ggplot(artDF.prot, aes(x=pct.prot, group=source)) +
geom_boxplot() +
labs(x='Protection Proportion (non-zero only)')