initial import of material for public archive into git
We're creating a fresh archive because the history for our old chapter includes API keys, data files, and other material we can't share.
This commit is contained in:
62
code/data_processing/03_make_paper_aff_table.py
Normal file
62
code/data_processing/03_make_paper_aff_table.py
Normal file
@@ -0,0 +1,62 @@
|
||||
import json
|
||||
import argparse
|
||||
import csv
|
||||
|
||||
def main():
|
||||
|
||||
parser = argparse.ArgumentParser(description='Generate paper to affiliation mapping file from abstracts file')
|
||||
parser.add_argument('-i', help='Abstract file')
|
||||
parser.add_argument('-o', help='TSV output file')
|
||||
args = parser.parse_args()
|
||||
|
||||
with open(args.i, 'r') as i:
|
||||
with open(args.o, 'w') as o:
|
||||
output = csv.writer(o, delimiter='\t')
|
||||
output.writerow(['paper_eid','affiliation_id',
|
||||
'organization','country'])
|
||||
for line in i:
|
||||
entries = get_entries(line)
|
||||
for entry in entries:
|
||||
output.writerow(entry)
|
||||
|
||||
|
||||
def get_entries(l):
|
||||
json_response = json.loads(l)
|
||||
full = json_response['abstracts-retrieval-response']
|
||||
head = full['item']['bibrecord']['head']
|
||||
eid = full['coredata']['eid']
|
||||
countries = get_aff_info(head, 'country')
|
||||
affiliation_ids = get_aff_info(head, '@afid')
|
||||
org_names = get_aff_info(head, 'organization')
|
||||
if countries:
|
||||
result = [[eid, affiliation_ids[i], org_names[i], countries[i]]
|
||||
for i in range(len(countries))]
|
||||
return result
|
||||
return []
|
||||
|
||||
def get_aff_info(head, affiliation_key):
|
||||
aff_info = []
|
||||
try:
|
||||
affiliations = make_list(head['author-group'])
|
||||
except KeyError:
|
||||
return None
|
||||
for x in affiliations:
|
||||
if x is None:
|
||||
continue
|
||||
try:
|
||||
curr_inst = x['affiliation'][affiliation_key]
|
||||
# May return a string or a list. If it's a list, then
|
||||
# return the final value of that list (This is the base organization)
|
||||
if isinstance(curr_inst, list):
|
||||
curr_inst = [x['$'] for x in curr_inst][-1]
|
||||
aff_info.append(curr_inst)
|
||||
except KeyError:
|
||||
# If there isn't affiliation info for these authors, return empty str
|
||||
aff_info.append('')
|
||||
return aff_info
|
||||
|
||||
def make_list(list_or_dict):
|
||||
return list_or_dict if isinstance(list_or_dict, list) else [list_or_dict]
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user