2020-04-01 20:15:11 +00:00
|
|
|
#!/bin/bash -x
|
|
|
|
|
|
|
|
TZ="UTC"
|
2020-04-02 18:28:34 +00:00
|
|
|
date_string=${OVERRIDE_DATE_STRING:-$(date +%Y%m%d)}
|
2020-04-01 20:15:11 +00:00
|
|
|
|
2020-04-04 22:20:34 +00:00
|
|
|
view_log="daily_views-${date_string}.log"
|
2020-04-02 18:28:34 +00:00
|
|
|
./wikipedia/scripts/wikiproject_scraper.py 2> >(tee wikipedia/logs/${view_log})
|
2020-04-01 20:15:11 +00:00
|
|
|
|
2020-04-04 22:20:34 +00:00
|
|
|
wd_log="wd-page-crawler-${date_string}.log"
|
|
|
|
python3 ./real-time-wiki-covid-tracker/PageCrawler.py -a "./wikipedia/resources/enwp_wikiproject_covid19_articles.txt" 2> >(tee wikipedia/logs/${wd_log})
|
|
|
|
|
2020-04-01 20:15:11 +00:00
|
|
|
# get the list of files
|
2020-04-04 22:20:34 +00:00
|
|
|
./wikipedia/scripts/fetch_daily_views.py -d "${date_string}" 2> >(tee -a wikipedia/logs/${view_log})
|
2020-04-01 20:49:02 +00:00
|
|
|
mv wikipedia/logs/${view_log} /var/www/covid19/wikipedia/logs/${view_log}
|
2020-04-01 20:15:11 +00:00
|
|
|
|
2020-04-04 22:20:34 +00:00
|
|
|
cd wikipedia/data
|
|
|
|
find */${date_string}/*dailyviews*.tsv | while read line; do
|
|
|
|
mkdir -p /var/www/covid19/wikipedia/$line
|
|
|
|
mv $line /var/www/covid19/wikipedia/$line
|
|
|
|
done
|
|
|
|
|
|
|
|
find */${date_string}/*dailyviews*.json | while read line; do
|
|
|
|
mkdir -p /var/www/covid19/wikipedia/$line
|
|
|
|
mv $line /var/www/covid19/wikipedia/$line
|
|
|
|
done
|
2020-04-01 20:15:11 +00:00
|
|
|
|
2020-04-04 22:20:34 +00:00
|
|
|
cd ../..
|