update cron scripts with new data format
This commit is contained in:
parent
974dc48b12
commit
c97028fabb
@ -6,12 +6,25 @@ date_string=$(date +%Y%m%d)
|
|||||||
revs_log="enwp-revisions-${date_string}.log"
|
revs_log="enwp-revisions-${date_string}.log"
|
||||||
./wikipedia/scripts/wikiproject_scraper.py 2> >(tee wikipedia/logs/${revs_log})
|
./wikipedia/scripts/wikiproject_scraper.py 2> >(tee wikipedia/logs/${revs_log})
|
||||||
|
|
||||||
./wikipedia/scripts/fetch_enwiki_revisions.py 2> >(tee -a wikipedia/logs/${revs_log})
|
wd_log="wd-page-crawler-${date_string}.log"
|
||||||
|
python3 ./real-time-wiki-covid-tracker/PageCrawler.py -a "./wikipedia/resources/enwp_wikiproject_covid19_articles.txt" 2> >(tee wikipedia/logs/${wd_log})
|
||||||
|
|
||||||
|
./wikipedia/scripts/fetch_revisions.py 2> >(tee -a wikipedia/logs/${revs_log})
|
||||||
mv wikipedia/logs/${revs_log} /var/www/covid19/wikipedia/logs/
|
mv wikipedia/logs/${revs_log} /var/www/covid19/wikipedia/logs/
|
||||||
|
|
||||||
revs_tsv="digobs_covid19-wikipedia-enwiki_revisions-${date_string}.tsv"
|
python3 ./wikipedia/scripts/copy_revisions_data.py ${date_string}
|
||||||
mv wikipedia/data/${revs_tsv} /var/www/covid19/wikipedia
|
|
||||||
|
|
||||||
revs_json="digobs_covid19-wikipedia-enwiki_revisions-${date_string}.json"
|
cd wikipedia/data
|
||||||
xz wikipedia/data/${revs_json}
|
xz */${date_string}/*revisions*.json
|
||||||
mv wikipedia/data/${revs_json}.xz /var/www/covid19/wikipedia
|
|
||||||
|
find */${date_string}/*revisions*.xz | while read line; do
|
||||||
|
mkdir -p /var/www/covid9/wikipedia/$line
|
||||||
|
mv $line /var/www/covid19/wikipedia/$line
|
||||||
|
done
|
||||||
|
|
||||||
|
find */${date_string}/*revisions*.tsv | while read line; do
|
||||||
|
mkdir -p /var/www/covid19/wikipedia/$line
|
||||||
|
mv $line /var/www/covid19/wikipedia/$line
|
||||||
|
done
|
||||||
|
|
||||||
|
cd ../..
|
||||||
|
@ -3,14 +3,25 @@
|
|||||||
TZ="UTC"
|
TZ="UTC"
|
||||||
date_string=${OVERRIDE_DATE_STRING:-$(date +%Y%m%d)}
|
date_string=${OVERRIDE_DATE_STRING:-$(date +%Y%m%d)}
|
||||||
|
|
||||||
view_log="enwp-daily_views-${date_string}.log"
|
view_log="daily_views-${date_string}.log"
|
||||||
./wikipedia/scripts/wikiproject_scraper.py 2> >(tee wikipedia/logs/${view_log})
|
./wikipedia/scripts/wikiproject_scraper.py 2> >(tee wikipedia/logs/${view_log})
|
||||||
|
|
||||||
|
wd_log="wd-page-crawler-${date_string}.log"
|
||||||
|
python3 ./real-time-wiki-covid-tracker/PageCrawler.py -a "./wikipedia/resources/enwp_wikiproject_covid19_articles.txt" 2> >(tee wikipedia/logs/${wd_log})
|
||||||
|
|
||||||
# get the list of files
|
# get the list of files
|
||||||
./wikipedia/scripts/fetch_enwiki_daily_views.py -d "${date_string}" 2> >(tee -a wikipedia/logs/${view_log})
|
./wikipedia/scripts/fetch_daily_views.py -d "${date_string}" 2> >(tee -a wikipedia/logs/${view_log})
|
||||||
mv wikipedia/logs/${view_log} /var/www/covid19/wikipedia/logs/${view_log}
|
mv wikipedia/logs/${view_log} /var/www/covid19/wikipedia/logs/${view_log}
|
||||||
mv wikipedia/data/digobs_covid19-wikipedia-enwiki_dailyviews-${date_string}.tsv /var/www/covid19/wikipedia/
|
|
||||||
|
|
||||||
# xz wikipedia/data/digobs_covid19-wikipedia-enwiki_dailyviews-${date_string}.json
|
cd wikipedia/data
|
||||||
mv wikipedia/data/digobs_covid19-wikipedia-enwiki_dailyviews-${date_string}.json /var/www/covid19/wikipedia/
|
find */${date_string}/*dailyviews*.tsv | while read line; do
|
||||||
|
mkdir -p /var/www/covid19/wikipedia/$line
|
||||||
|
mv $line /var/www/covid19/wikipedia/$line
|
||||||
|
done
|
||||||
|
|
||||||
|
find */${date_string}/*dailyviews*.json | while read line; do
|
||||||
|
mkdir -p /var/www/covid19/wikipedia/$line
|
||||||
|
mv $line /var/www/covid19/wikipedia/$line
|
||||||
|
done
|
||||||
|
|
||||||
|
cd ../..
|
||||||
|
Loading…
Reference in New Issue
Block a user