update cron scripts with new data format

This commit is contained in:
2020-04-04 15:20:34 -07:00
parent 974dc48b12
commit c97028fabb
2 changed files with 35 additions and 11 deletions

View File

@@ -3,14 +3,25 @@
TZ="UTC"
date_string=${OVERRIDE_DATE_STRING:-$(date +%Y%m%d)}
view_log="enwp-daily_views-${date_string}.log"
view_log="daily_views-${date_string}.log"
./wikipedia/scripts/wikiproject_scraper.py 2> >(tee wikipedia/logs/${view_log})
wd_log="wd-page-crawler-${date_string}.log"
python3 ./real-time-wiki-covid-tracker/PageCrawler.py -a "./wikipedia/resources/enwp_wikiproject_covid19_articles.txt" 2> >(tee wikipedia/logs/${wd_log})
# get the list of files
./wikipedia/scripts/fetch_enwiki_daily_views.py -d "${date_string}" 2> >(tee -a wikipedia/logs/${view_log})
./wikipedia/scripts/fetch_daily_views.py -d "${date_string}" 2> >(tee -a wikipedia/logs/${view_log})
mv wikipedia/logs/${view_log} /var/www/covid19/wikipedia/logs/${view_log}
mv wikipedia/data/digobs_covid19-wikipedia-enwiki_dailyviews-${date_string}.tsv /var/www/covid19/wikipedia/
# xz wikipedia/data/digobs_covid19-wikipedia-enwiki_dailyviews-${date_string}.json
mv wikipedia/data/digobs_covid19-wikipedia-enwiki_dailyviews-${date_string}.json /var/www/covid19/wikipedia/
cd wikipedia/data
find */${date_string}/*dailyviews*.tsv | while read line; do
mkdir -p /var/www/covid19/wikipedia/$line
mv $line /var/www/covid19/wikipedia/$line
done
find */${date_string}/*dailyviews*.json | while read line; do
mkdir -p /var/www/covid19/wikipedia/$line
mv $line /var/www/covid19/wikipedia/$line
done
cd ../..