updated the labels to try to store in a better format
This commit is contained in:
		
							parent
							
								
									7e8fb1982b
								
							
						
					
					
						commit
						43fb346318
					
				
							
								
								
									
										203736
									
								
								p2/quest/071525_neurobiber_labels.csv
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										203736
									
								
								p2/quest/071525_neurobiber_labels.csv
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							
							
								
								
									
										6
									
								
								p2/quest/neurobiber-categorization.log
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										6
									
								
								p2/quest/neurobiber-categorization.log
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,6 @@ | |||||||
|  | starting the job at: Tue Jul 15 14:09:10 CDT 2025 | ||||||
|  | setting up the environment | ||||||
|  | running the neurobiber labeling script | ||||||
|  | neurobiber labeling pau | ||||||
|  | job finished, cleaning up | ||||||
|  | job pau at: Tue Jul 15 14:12:26 CDT 2025 | ||||||
| @ -110,9 +110,13 @@ if __name__ == "__main__": | |||||||
|     model, tokenizer = load_model_and_tokenizer() |     model, tokenizer = load_model_and_tokenizer() | ||||||
|     preds = predict_batch(model, tokenizer, docs) |     preds = predict_batch(model, tokenizer, docs) | ||||||
|     #new columns in the df for the predicted neurobiber items  |     #new columns in the df for the predicted neurobiber items  | ||||||
|     preds_cols = [f"neurobiber_{i+1}" for i in range(96)] |     #preds_cols = [f"neurobiber_{i+1}" for i in range(96)] | ||||||
|     preds_df = pd.DataFrame(preds, columns=preds_cols, index=first_discussion_df.index) |     #preds_df = pd.DataFrame(preds, columns=preds_cols, index=first_discussion_df.index) | ||||||
|     final_discussion_df = pd.concat([first_discussion_df, preds_df], axis=1) |     #final_discussion_df = pd.concat([first_discussion_df, preds_df], axis=1) | ||||||
|  |      | ||||||
|  |     #assigning the preditions as a new column  | ||||||
|  |     final_discussion_df = first_discussion_df.copy() | ||||||
|  |     final_discussion_df["neurobiber_preds"] = list(preds) | ||||||
|     #assert that order has been preserved  |     #assert that order has been preserved  | ||||||
|     for _ in range(10): |     for _ in range(10): | ||||||
|         random_index = random.choice(first_discussion_df.index) |         random_index = random.choice(first_discussion_df.index) | ||||||
| @ -120,7 +124,7 @@ if __name__ == "__main__": | |||||||
|     #assert that there are the same number of rows in first_discussion_df and second_discussion_df |     #assert that there are the same number of rows in first_discussion_df and second_discussion_df | ||||||
|     assert len(first_discussion_df) == len(final_discussion_df) |     assert len(first_discussion_df) == len(final_discussion_df) | ||||||
|     # if passing the prior asserts, let's write to a csv |     # if passing the prior asserts, let's write to a csv | ||||||
|     final_discussion_df.to_csv("/home/nws8519/git/mw-lifecycle-analysis/p2/quest/071425_neurobiber_labels.csv", index=False) |     final_discussion_df.to_csv("/home/nws8519/git/mw-lifecycle-analysis/p2/quest/071525_neurobiber_labels.csv", index=False) | ||||||
|     print('neurobiber labeling pau') |     print('neurobiber labeling pau') | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user