poking at pre processing
This commit is contained in:
		
							parent
							
								
									7aa3af05ea
								
							
						
					
					
						commit
						a1ae286073
					
				
							
								
								
									
										49
									
								
								text_pp.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										49
									
								
								text_pp.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,49 @@ | |||||||
|  | import os  | ||||||
|  | import csv  | ||||||
|  | import json  | ||||||
|  | 
 | ||||||
|  | path = '/data/users/mgaughan/kkex_comment_data_120523/' | ||||||
|  | empty_file_dict = {'data': {'repository': {'issues': {'edges': []}}}} | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | #pruning directory of bad data files/things that cannot be used | ||||||
|  | def check_files_for_content(filelist): | ||||||
|  |     x = 0 | ||||||
|  |     bad_data_files = 0 | ||||||
|  |     for file in filelist: | ||||||
|  |         filepath = path + file | ||||||
|  |         opened_file = open(filepath) | ||||||
|  |         file_contents = json.load(opened_file) | ||||||
|  |         bad_comment_data = 'errors' in file_contents.keys() or file_contents == empty_file_dict | ||||||
|  |         x += 1 | ||||||
|  |         if bad_comment_data: | ||||||
|  |             bad_data_files += 1 | ||||||
|  |             os.remove(filepath) | ||||||
|  |             opened_file.close() | ||||||
|  |             continue | ||||||
|  |         list_of_issues= file_contents['data']['repository']['issues']['edges'] | ||||||
|  |         handle_repo_issues(list_of_issues) | ||||||
|  |         if x < 2: | ||||||
|  |             print(list_of_issues[0]['node'].keys()) | ||||||
|  |         else:  | ||||||
|  |             break | ||||||
|  |     print(bad_data_files) | ||||||
|  |              | ||||||
|  | def handle_repo_issues(list_of_issues): | ||||||
|  |     for issue in list_of_issues: | ||||||
|  |         print(issue['node']['author']['url']) | ||||||
|  |         list_of_comments = issue['node']['comments']['edges'] | ||||||
|  |         handle_issue_comments(list_of_comments) | ||||||
|  | 
 | ||||||
|  | def handle_issue_comments(list_of_comments): | ||||||
|  |     for comment in list_of_comments: | ||||||
|  |         comment_body = comment['node']['body'] | ||||||
|  |         comment_author = comment['node']['author'] | ||||||
|  | 
 | ||||||
|  |         print(comment_author) | ||||||
|  |         print(comment_body) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | if __name__ == "__main__": | ||||||
|  |     check_files_for_content(os.listdir(path)) | ||||||
		Loading…
	
		Reference in New Issue
	
	Block a user