73 lines
		
	
	
		
			2.1 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
			
		
		
	
	
			73 lines
		
	
	
		
			2.1 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
| {
 | |
|  "cells": [
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": null,
 | |
|    "id": "2dd04d34-25c0-470f-973d-1325ce0df797",
 | |
|    "metadata": {},
 | |
|    "outputs": [
 | |
|     {
 | |
|      "name": "stdout",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "The history saving thread hit an unexpected error (OperationalError('disk I/O error')).History will not be written to the database.\n"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "/gscratch/scrubbed/mjilg/jupyter-notebook/lib/python3.9/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
 | |
|       "  from .autonotebook import tqdm as notebook_tqdm\n"
 | |
|      ]
 | |
|     }
 | |
|    ],
 | |
|    "source": [
 | |
|     "import pandas as pd\n",
 | |
|     "import numpy as np\n",
 | |
|     "from transformers import AutoTokenizer, AutoModelForSequenceClassification\n",
 | |
|     "from transformers import Trainer, TrainingArguments\n",
 | |
|     "from torch.utils.data import Dataset\n",
 | |
|     "from sklearn.model_selection import train_test_split\n",
 | |
|     "from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix\n",
 | |
|     "import torch\n",
 | |
|     "import json"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": null,
 | |
|    "id": "3504b633-4999-47d0-a6eb-ce7916206ced",
 | |
|    "metadata": {},
 | |
|    "outputs": [],
 | |
|    "source": [
 | |
|     "model_name = \"distilbert-base-uncased\"\n",
 | |
|     "model = AutoModelForSequenceClassification.from_pretrained(model_name, \n",
 | |
|     "                                                           num_labels=1) \n",
 | |
|     "tokenizer = AutoTokenizer.from_pretrained(model_name)"
 | |
|    ]
 | |
|   }
 | |
|  ],
 | |
|  "metadata": {
 | |
|   "kernelspec": {
 | |
|    "display_name": "Python 3 (ipykernel)",
 | |
|    "language": "python",
 | |
|    "name": "python3"
 | |
|   },
 | |
|   "language_info": {
 | |
|    "codemirror_mode": {
 | |
|     "name": "ipython",
 | |
|     "version": 3
 | |
|    },
 | |
|    "file_extension": ".py",
 | |
|    "mimetype": "text/x-python",
 | |
|    "name": "python",
 | |
|    "nbconvert_exporter": "python",
 | |
|    "pygments_lexer": "ipython3",
 | |
|    "version": "3.9.21"
 | |
|   }
 | |
|  },
 | |
|  "nbformat": 4,
 | |
|  "nbformat_minor": 5
 | |
| }
 |