1
0
mw-lifecycle-analysis/.ipynb_checkpoints/BERT-hw-checkpoint.ipynb
2025-03-01 17:08:16 -08:00

73 lines
2.1 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "2dd04d34-25c0-470f-973d-1325ce0df797",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"The history saving thread hit an unexpected error (OperationalError('disk I/O error')).History will not be written to the database.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/gscratch/scrubbed/mjilg/jupyter-notebook/lib/python3.9/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
" from .autonotebook import tqdm as notebook_tqdm\n"
]
}
],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"from transformers import AutoTokenizer, AutoModelForSequenceClassification\n",
"from transformers import Trainer, TrainingArguments\n",
"from torch.utils.data import Dataset\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix\n",
"import torch\n",
"import json"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3504b633-4999-47d0-a6eb-ce7916206ced",
"metadata": {},
"outputs": [],
"source": [
"model_name = \"distilbert-base-uncased\"\n",
"model = AutoModelForSequenceClassification.from_pretrained(model_name, \n",
" num_labels=1) \n",
"tokenizer = AutoTokenizer.from_pretrained(model_name)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.21"
}
},
"nbformat": 4,
"nbformat_minor": 5
}