{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "2dd04d34-25c0-470f-973d-1325ce0df797", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The history saving thread hit an unexpected error (OperationalError('disk I/O error')).History will not be written to the database.\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/gscratch/scrubbed/mjilg/jupyter-notebook/lib/python3.9/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", " from .autonotebook import tqdm as notebook_tqdm\n" ] } ], "source": [ "import pandas as pd\n", "import numpy as np\n", "from transformers import AutoTokenizer, AutoModelForSequenceClassification\n", "from transformers import Trainer, TrainingArguments\n", "from torch.utils.data import Dataset\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix\n", "import torch\n", "import json" ] }, { "cell_type": "code", "execution_count": null, "id": "3504b633-4999-47d0-a6eb-ce7916206ced", "metadata": {}, "outputs": [], "source": [ "model_name = \"distilbert-base-uncased\"\n", "model = AutoModelForSequenceClassification.from_pretrained(model_name, \n", " num_labels=1) \n", "tokenizer = AutoTokenizer.from_pretrained(model_name)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.21" } }, "nbformat": 4, "nbformat_minor": 5 }