pointing at updated data url, adding explicit NA handling to factor, cutting unnecessary call to ggplot2, and updated corresponding output from new data file. May not work while kibo urls are getting resolved
This commit is contained in:
parent
4f8a698c62
commit
282588772e
@ -1,11 +1,11 @@
|
||||
"article","project","timestamp","views"
|
||||
"2019–20_coronavirus_pandemic","en.wikipedia","2020032600",1148284
|
||||
"2020_coronavirus_pandemic_in_India","en.wikipedia","2020032600",513901
|
||||
"Coronavirus","en.wikipedia","2020032600",397959
|
||||
"2020_coronavirus_pandemic_in_the_United_States","en.wikipedia","2020032600",337676
|
||||
"2019–20_coronavirus_pandemic_by_country_and_territory","en.wikipedia","2020032600",298603
|
||||
"2020_coronavirus_pandemic_in_Italy","en.wikipedia","2020032600",297687
|
||||
"Coronavirus_disease_2019","en.wikipedia","2020032600",292272
|
||||
"2020_coronavirus_pandemic_in_Spain","en.wikipedia","2020032600",114732
|
||||
"2020_coronavirus_pandemic_in_the_United_Kingdom","en.wikipedia","2020032600",111856
|
||||
"Anthony_Fauci","en.wikipedia","2020032600",103205
|
||||
"2019–20_coronavirus_pandemic","en.wikipedia","2020033100",831879
|
||||
"2020_coronavirus_pandemic_in_India","en.wikipedia","2020033100",323123
|
||||
"2019–20_coronavirus_pandemic_by_country_and_territory","en.wikipedia","2020033100",315572
|
||||
"2020_coronavirus_pandemic_in_the_United_States","en.wikipedia","2020033100",290535
|
||||
"Coronavirus_disease_2019","en.wikipedia","2020033100",211391
|
||||
"2020_coronavirus_pandemic_in_Italy","en.wikipedia","2020033100",209908
|
||||
"Coronavirus","en.wikipedia","2020033100",188921
|
||||
"USNS_Comfort_(T-AH-20)","en.wikipedia","2020033100",150422
|
||||
"USNS_Comfort_(T-AH-20)","en.wikipedia","2020033100",150422
|
||||
"WrestleMania_36","en.wikipedia","2020033100",137637
|
||||
|
|
@ -4,13 +4,12 @@
|
||||
### Minimal example analysis file using pageview data
|
||||
|
||||
library(tidyverse)
|
||||
library(ggplot2)
|
||||
library(scales)
|
||||
|
||||
### Import and cleanup data
|
||||
### Import and cleanup one datafile from the observatory
|
||||
|
||||
DataURL <-
|
||||
url("https://github.com/CommunityDataScienceCollective/COVID-19_Digital_Observatory/raw/master/wikipedia_views/data/dailyviews2020032600.tsv")
|
||||
url("https://covid19.communitydata.science/datasets/wikipedia/digobs_covid19-wikipedia-enwiki_dailyviews-20200401.tsv")
|
||||
|
||||
views <-
|
||||
read.table(DataURL, sep="\t", header=TRUE, stringsAsFactors=FALSE)
|
||||
@ -30,12 +29,14 @@ views <-
|
||||
### (see https://www.tidyverse.org for more info)
|
||||
|
||||
views <- views[,c("article", "project", "timestamp", "views")]
|
||||
views$timestamp <- factor(views$timestamp)
|
||||
views$timestamp <- fct_explicit_na(views$timestamp)
|
||||
|
||||
|
||||
### Sorts and groups at the same time
|
||||
views.by.proj.date <- arrange(group_by(views, project, timestamp),
|
||||
desc(views))
|
||||
|
||||
|
||||
### Export just the top 10 by pageviews
|
||||
write.table(head(views.by.proj.date, 10),
|
||||
file="output/top10_views_by_project_date.csv", sep=",",
|
||||
|
Loading…
Reference in New Issue
Block a user