initial import of material for public archive into git
We're creating a fresh archive because the history for our old chapter includes API keys, data files, and other material we can't share.
This commit is contained in:
50
code/data_processing/04_make_paper_subject_table.py
Normal file
50
code/data_processing/04_make_paper_subject_table.py
Normal file
@@ -0,0 +1,50 @@
|
||||
import json
|
||||
import argparse
|
||||
import csv
|
||||
|
||||
def main():
|
||||
|
||||
parser = argparse.ArgumentParser(description='Generate paper to subject mapping file from abstracts file')
|
||||
parser.add_argument('-i', help='Abstract file')
|
||||
parser.add_argument('-o', help='TSV output file')
|
||||
args = parser.parse_args()
|
||||
|
||||
with open(args.i, 'r') as i:
|
||||
with open(args.o, 'w') as o:
|
||||
output = csv.writer(o, delimiter='\t')
|
||||
output.writerow(['paper_eid','subject',
|
||||
'subject_code'])
|
||||
for line in i:
|
||||
entries = get_entries(line)
|
||||
for entry in entries:
|
||||
output.writerow(entry)
|
||||
|
||||
|
||||
def get_entries(l):
|
||||
json_response = json.loads(l)
|
||||
full = json_response['abstracts-retrieval-response']
|
||||
eid = full['coredata']['eid']
|
||||
subjects = get_subjects(full)
|
||||
# Prepend the eid, and return the subjects
|
||||
return [[eid,s[0],s[1]] for s in subjects]
|
||||
return []
|
||||
|
||||
|
||||
def get_subjects(abstract_response):
|
||||
try:
|
||||
subject_info = make_list(abstract_response['subject-areas']['subject-area'])
|
||||
except KeyError:
|
||||
print(result)
|
||||
raise
|
||||
result = []
|
||||
for s in subject_info:
|
||||
# Get the subject name and code, and append them
|
||||
result.append([s['$'],s['@code']])
|
||||
return result
|
||||
|
||||
|
||||
def make_list(list_or_dict):
|
||||
return list_or_dict if isinstance(list_or_dict, list) else [list_or_dict]
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user