import os import csv import json path = '/data/users/mgaughan/kkex_comment_data_120523/' empty_file_dict = {'data': {'repository': {'issues': {'edges': []}}}} #pruning directory of bad data files/things that cannot be used def check_files_for_content(filelist): x = 0 bad_data_files = 0 for file in filelist: filepath = path + file opened_file = open(filepath) file_contents = json.load(opened_file) bad_comment_data = 'errors' in file_contents.keys() or file_contents == empty_file_dict x += 1 if bad_comment_data: bad_data_files += 1 os.remove(filepath) opened_file.close() continue list_of_issues= file_contents['data']['repository']['issues']['edges'] handle_repo_issues(list_of_issues) if x < 2: print(list_of_issues[0]['node'].keys()) else: break print(bad_data_files) def handle_repo_issues(list_of_issues): for issue in list_of_issues: print(issue['node']['author']['url']) list_of_comments = issue['node']['comments']['edges'] handle_issue_comments(list_of_comments) def handle_issue_comments(list_of_comments): for comment in list_of_comments: comment_body = comment['node']['body'] comment_author = comment['node']['author'] print(comment_author) print(comment_body) if __name__ == "__main__": check_files_for_content(os.listdir(path))