Merge pull request #17 from makoshark/master

changes to support historical view data
This commit is contained in:
Kaylea Champion 2020-04-02 14:18:05 -07:00 committed by GitHub
commit 152704df7c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 10 additions and 9 deletions

View File

@ -3,10 +3,10 @@
TZ="UTC" TZ="UTC"
date_string=$(date +%Y%m%d) date_string=$(date +%Y%m%d)
./wikipedia/scripts/wikiproject_scraper.py 2> >(tee wikipedia/logs/enwp-wikiproject_scraper-${date_string}.log)
revs_log="enwp-revisions-${date_string}.log" revs_log="enwp-revisions-${date_string}.log"
./wikipedia/scripts/fetch_enwiki_revisions.py 2> >(tee wikipedia/logs/${rev_log}) ./wikipedia/scripts/wikiproject_scraper.py 2> >(tee wikipedia/logs/${revs_log})
./wikipedia/scripts/fetch_enwiki_revisions.py 2> >(tee -a wikipedia/logs/${revs_log})
mv wikipedia/logs/${revs_log} /var/www/covid19/wikipedia/logs/ mv wikipedia/logs/${revs_log} /var/www/covid19/wikipedia/logs/
revs_tsv="digobs_covid19-wikipedia-enwiki_revisions-${date_string}.tsv" revs_tsv="digobs_covid19-wikipedia-enwiki_revisions-${date_string}.tsv"

View File

@ -1,13 +1,13 @@
#!/bin/bash -x #!/bin/bash -x
TZ="UTC" TZ="UTC"
date_string=$(date +%Y%m%d) date_string=${OVERRIDE_DATE_STRING:-$(date +%Y%m%d)}
./wikipedia/scripts/wikiproject_scraper.py 2> >(tee wikipedia/logs/enwp-wikiproject_scraper-${date_string}.log) view_log="enwp-daily_views-${date_string}.log"
./wikipedia/scripts/wikiproject_scraper.py 2> >(tee wikipedia/logs/${view_log})
# get the list of files # get the list of files
view_log="enwp-daily_views-${date_string}.log" ./wikipedia/scripts/fetch_enwiki_daily_views.py -d "${date_string}" 2> >(tee -a wikipedia/logs/${view_log})
./wikipedia/scripts/fetch_enwiki_daily_views.py 2> >(tee wikipedia/logs/${view_log})
mv wikipedia/logs/${view_log} /var/www/covid19/wikipedia/logs/${view_log} mv wikipedia/logs/${view_log} /var/www/covid19/wikipedia/logs/${view_log}
mv wikipedia/data/digobs_covid19-wikipedia-enwiki_dailyviews-${date_string}.tsv /var/www/covid19/wikipedia/ mv wikipedia/data/digobs_covid19-wikipedia-enwiki_dailyviews-${date_string}.tsv /var/www/covid19/wikipedia/

View File

@ -58,8 +58,8 @@ def main():
logging.info(f"Last commit: {digobs.git_hash()}") logging.info(f"Last commit: {digobs.git_hash()}")
#1 Load up the list of article names #1 Load up the list of article names
j_outfilename = os.path.join(outputPath, f"digobs_covid19-wikipedia-enwiki_dailyviews-{export_date}.json") j_outfilename = os.path.join(outputPath, f"digobs_covid19-wikipedia-enwiki_dailyviews-{query_date}.json")
t_outfilename = os.path.join(outputPath, f"digobs_covid19-wikipedia-enwiki_dailyviews-{export_date}.tsv") t_outfilename = os.path.join(outputPath, f"digobs_covid19-wikipedia-enwiki_dailyviews-{query_date}.tsv")
with open(articleFile, 'r') as infile: with open(articleFile, 'r') as infile:
articleList = list(map(str.strip, infile)) articleList = list(map(str.strip, infile))
@ -82,6 +82,7 @@ def main():
else: else:
failure = failure + 1 failure = failure + 1
logging.warning(f"Failure: {response.status_code} from {url}") logging.warning(f"Failure: {response.status_code} from {url}")
continue
# start writing the CSV File if it doesn't exist yet # start writing the CSV File if it doesn't exist yet
try: try: