We're creating a fresh archive because the history for our old chapter includes API keys, data files, and other material we can't share.
57 lines
1.8 KiB
Python
57 lines
1.8 KiB
Python
from request_functions import *
|
|
import argparse
|
|
import json
|
|
import subprocess
|
|
|
|
|
|
def main():
|
|
|
|
parser = argparse.ArgumentParser(description='Output JSON of abstracts and bibliography of all articles passed in.')
|
|
parser.add_argument('-i', help='JSON file which includes eids')
|
|
parser.add_argument('--eid', '-e', help='Single eid')
|
|
parser.add_argument('-o', help='Where to append JSON results')
|
|
args = parser.parse_args()
|
|
|
|
if args.eid:
|
|
eids = [args.eid]
|
|
elif args.i:
|
|
with open(args.i, 'r') as f:
|
|
eids = [json.loads(line)['eid'] for line in f]
|
|
else:
|
|
print('Need to either pass in an eid or a json file with eids')
|
|
|
|
# If the script gets interrupted, we need to start where we left off
|
|
try:
|
|
errors = []
|
|
with open(args.o, 'r') as f:
|
|
completed_eids = []
|
|
for line in f:
|
|
try:
|
|
result = json.loads(line)
|
|
completed_eids.append(result['abstracts-retrieval-response']['coredata']['eid'])
|
|
except ValueError:
|
|
errors.append(line)
|
|
except IOError as e:
|
|
completed_eids = []
|
|
|
|
|
|
print('{} completed eids'.format(len(completed_eids)))
|
|
with open(args.o, 'a') as out_file:
|
|
for eid in eids:
|
|
if eid not in completed_eids:
|
|
result = get_abstract(eid)
|
|
if result:
|
|
out_file.write(result)
|
|
out_file.write('\n')
|
|
else:
|
|
errors.append(eid)
|
|
|
|
if len(errors) > 0:
|
|
with open('raw_data/missing_eids.json', 'a') as l:
|
|
# Add the bad lines from the output file
|
|
(l.write(e) for e in errors)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|