We're creating a fresh archive because the history for our old chapter includes API keys, data files, and other material we can't share.
51 lines
1.4 KiB
Python
51 lines
1.4 KiB
Python
import json
|
|
import argparse
|
|
import csv
|
|
|
|
def main():
|
|
|
|
parser = argparse.ArgumentParser(description='Generate paper to subject mapping file from abstracts file')
|
|
parser.add_argument('-i', help='Abstract file')
|
|
parser.add_argument('-o', help='TSV output file')
|
|
args = parser.parse_args()
|
|
|
|
with open(args.i, 'r') as i:
|
|
with open(args.o, 'w') as o:
|
|
output = csv.writer(o, delimiter='\t')
|
|
output.writerow(['paper_eid','subject',
|
|
'subject_code'])
|
|
for line in i:
|
|
entries = get_entries(line)
|
|
for entry in entries:
|
|
output.writerow(entry)
|
|
|
|
|
|
def get_entries(l):
|
|
json_response = json.loads(l)
|
|
full = json_response['abstracts-retrieval-response']
|
|
eid = full['coredata']['eid']
|
|
subjects = get_subjects(full)
|
|
# Prepend the eid, and return the subjects
|
|
return [[eid,s[0],s[1]] for s in subjects]
|
|
return []
|
|
|
|
|
|
def get_subjects(abstract_response):
|
|
try:
|
|
subject_info = make_list(abstract_response['subject-areas']['subject-area'])
|
|
except KeyError:
|
|
print(result)
|
|
raise
|
|
result = []
|
|
for s in subject_info:
|
|
# Get the subject name and code, and append them
|
|
result.append([s['$'],s['@code']])
|
|
return result
|
|
|
|
|
|
def make_list(list_or_dict):
|
|
return list_or_dict if isinstance(list_or_dict, list) else [list_or_dict]
|
|
|
|
if __name__ == '__main__':
|
|
main()
|