1
0

initial import of material for public archive into git

We're creating a fresh archive because the history for our old chapter includes
API keys, data files, and other material we can't share.
This commit is contained in:
2018-01-21 17:15:51 -08:00
commit dd420c77de
41 changed files with 7069 additions and 0 deletions

View File

@@ -0,0 +1,50 @@
import json
import argparse
import csv
def main():
parser = argparse.ArgumentParser(description='Generate paper to subject mapping file from abstracts file')
parser.add_argument('-i', help='Abstract file')
parser.add_argument('-o', help='TSV output file')
args = parser.parse_args()
with open(args.i, 'r') as i:
with open(args.o, 'w') as o:
output = csv.writer(o, delimiter='\t')
output.writerow(['paper_eid','subject',
'subject_code'])
for line in i:
entries = get_entries(line)
for entry in entries:
output.writerow(entry)
def get_entries(l):
json_response = json.loads(l)
full = json_response['abstracts-retrieval-response']
eid = full['coredata']['eid']
subjects = get_subjects(full)
# Prepend the eid, and return the subjects
return [[eid,s[0],s[1]] for s in subjects]
return []
def get_subjects(abstract_response):
try:
subject_info = make_list(abstract_response['subject-areas']['subject-area'])
except KeyError:
print(result)
raise
result = []
for s in subject_info:
# Get the subject name and code, and append them
result.append([s['$'],s['@code']])
return result
def make_list(list_or_dict):
return list_or_dict if isinstance(list_or_dict, list) else [list_or_dict]
if __name__ == '__main__':
main()