poking at pre processing
This commit is contained in:
		
							parent
							
								
									7aa3af05ea
								
							
						
					
					
						commit
						a1ae286073
					
				
							
								
								
									
										49
									
								
								text_pp.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										49
									
								
								text_pp.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,49 @@ | ||||
| import os  | ||||
| import csv  | ||||
| import json  | ||||
| 
 | ||||
| path = '/data/users/mgaughan/kkex_comment_data_120523/' | ||||
| empty_file_dict = {'data': {'repository': {'issues': {'edges': []}}}} | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| #pruning directory of bad data files/things that cannot be used | ||||
| def check_files_for_content(filelist): | ||||
|     x = 0 | ||||
|     bad_data_files = 0 | ||||
|     for file in filelist: | ||||
|         filepath = path + file | ||||
|         opened_file = open(filepath) | ||||
|         file_contents = json.load(opened_file) | ||||
|         bad_comment_data = 'errors' in file_contents.keys() or file_contents == empty_file_dict | ||||
|         x += 1 | ||||
|         if bad_comment_data: | ||||
|             bad_data_files += 1 | ||||
|             os.remove(filepath) | ||||
|             opened_file.close() | ||||
|             continue | ||||
|         list_of_issues= file_contents['data']['repository']['issues']['edges'] | ||||
|         handle_repo_issues(list_of_issues) | ||||
|         if x < 2: | ||||
|             print(list_of_issues[0]['node'].keys()) | ||||
|         else:  | ||||
|             break | ||||
|     print(bad_data_files) | ||||
|              | ||||
| def handle_repo_issues(list_of_issues): | ||||
|     for issue in list_of_issues: | ||||
|         print(issue['node']['author']['url']) | ||||
|         list_of_comments = issue['node']['comments']['edges'] | ||||
|         handle_issue_comments(list_of_comments) | ||||
| 
 | ||||
| def handle_issue_comments(list_of_comments): | ||||
|     for comment in list_of_comments: | ||||
|         comment_body = comment['node']['body'] | ||||
|         comment_author = comment['node']['author'] | ||||
| 
 | ||||
|         print(comment_author) | ||||
|         print(comment_body) | ||||
| 
 | ||||
| 
 | ||||
| if __name__ == "__main__": | ||||
|     check_files_for_content(os.listdir(path)) | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user