a minimal example in R that outputs a table of top 5 related search terms per day per query

This commit is contained in:
aaronshaw 2020-03-28 10:18:33 -05:00
parent 070a1623aa
commit c025a526e8
2 changed files with 34 additions and 0 deletions

View File

@ -0,0 +1,6 @@
"term","date","query.1","query.2","query.3","query.4","query.5"
"coronavirus","2020-03-27",coronavirus update,corona,coronavirus symptoms,news coronavirus,coronavirus cases
"covid-19","2020-03-27",covid-19 coronavirus,coronavirus,covid,covid-19 cases,covid 19
"covid-19 pandemic","2020-03-27",coronavirus,covid-19 coronavirus pandemic,coronavirus pandemic,who,is covid-19 a pandemic
"covid19","2020-03-27",covid,covid 19,coronavirus covid19,coronavirus,covid19 cases
"sars-cov-2","2020-03-27",coronavirus,coronavirus sars-cov-2,covid-19,covid-19 sars-cov-2,sars
1 term date query.1 query.2 query.3 query.4 query.5
2 coronavirus 2020-03-27 coronavirus update corona coronavirus symptoms news coronavirus coronavirus cases
3 covid-19 2020-03-27 covid-19 coronavirus coronavirus covid covid-19 cases covid 19
4 covid-19 pandemic 2020-03-27 coronavirus covid-19 coronavirus pandemic coronavirus pandemic who is covid-19 a pandemic
5 covid19 2020-03-27 covid covid 19 coronavirus covid19 coronavirus covid19 cases
6 sars-cov-2 2020-03-27 coronavirus coronavirus sars-cov-2 covid-19 covid-19 sars-cov-2 sars

View File

@ -0,0 +1,28 @@
### COVID-19 Digital Observatory
### 2020-03-28
###
### Minimal example analysis file using trending search data
### Identify data source directory and file
DataDir <- ("../data/output/")
DataFile <- ("related_searches_top.csv")
### Import and cleanup data
related.searches.top <- read.table(paste(DataDir,DataFile,
sep=""),
sep=",", header=TRUE,
stringsAsFactors=FALSE)
### Aggregate top 5 search queries by term/day
top5.per.term.date <- aggregate(query ~ term + date,
data=related.searches.top,
head, 5)
## Might cleanup a bit for further analysis or visualization...
top5.per.term.date$date <- asDate(top5.per.term.date$date)
### Export
write.table(top5.per.term.date,
file="output/top5_queries_per_term_per_date.csv", sep=",",
row.names=FALSE)