poking at pre processing
This commit is contained in:
parent
7aa3af05ea
commit
a1ae286073
49
text_pp.py
Normal file
49
text_pp.py
Normal file
@ -0,0 +1,49 @@
|
||||
import os
|
||||
import csv
|
||||
import json
|
||||
|
||||
path = '/data/users/mgaughan/kkex_comment_data_120523/'
|
||||
empty_file_dict = {'data': {'repository': {'issues': {'edges': []}}}}
|
||||
|
||||
|
||||
|
||||
#pruning directory of bad data files/things that cannot be used
|
||||
def check_files_for_content(filelist):
|
||||
x = 0
|
||||
bad_data_files = 0
|
||||
for file in filelist:
|
||||
filepath = path + file
|
||||
opened_file = open(filepath)
|
||||
file_contents = json.load(opened_file)
|
||||
bad_comment_data = 'errors' in file_contents.keys() or file_contents == empty_file_dict
|
||||
x += 1
|
||||
if bad_comment_data:
|
||||
bad_data_files += 1
|
||||
os.remove(filepath)
|
||||
opened_file.close()
|
||||
continue
|
||||
list_of_issues= file_contents['data']['repository']['issues']['edges']
|
||||
handle_repo_issues(list_of_issues)
|
||||
if x < 2:
|
||||
print(list_of_issues[0]['node'].keys())
|
||||
else:
|
||||
break
|
||||
print(bad_data_files)
|
||||
|
||||
def handle_repo_issues(list_of_issues):
|
||||
for issue in list_of_issues:
|
||||
print(issue['node']['author']['url'])
|
||||
list_of_comments = issue['node']['comments']['edges']
|
||||
handle_issue_comments(list_of_comments)
|
||||
|
||||
def handle_issue_comments(list_of_comments):
|
||||
for comment in list_of_comments:
|
||||
comment_body = comment['node']['body']
|
||||
comment_author = comment['node']['author']
|
||||
|
||||
print(comment_author)
|
||||
print(comment_body)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
check_files_for_content(os.listdir(path))
|
Loading…
Reference in New Issue
Block a user