1
0

updating organization and adding some files for quest slurm, etc.

This commit is contained in:
mgaughan 2025-05-20 14:39:33 -05:00
parent 529571abb1
commit 24b75e9964
23 changed files with 55 additions and 0 deletions

View File

@ -0,0 +1,8 @@
from transformers import AutoModelForCausalLM, AutoTokenizer, OlmoForCausalLM
import torch
#load in the different models
olmo = AutoModelForCausalLLM.from_pretrained("allenai/OLMo-2-0425-1B-Instruct")
tokenizer = AutoTokenizer.from_pretrained("allenai/OLMo-2-0425-1B-Instruct")
#

12
models/p2-first-ir.py Normal file
View File

@ -0,0 +1,12 @@
from transformers import AutoModelForCausalLM, AutoTokenizer, OlmoForCausalLM
import torch
#load in the different models
olmo = AutoModelForCausalLLM.from_pretrained("allenai/OLMo-2-0425-1B-Instruct")
tokenizer = AutoTokenizer.from_pretrained("allenai/OLMo-2-0425-1B-Instruct")
#prompt
#hand the model the data
#collect the response

29
scripts/quest_srun.sh Normal file
View File

@ -0,0 +1,29 @@
#!/bin/bash
#SBATCH -A p32852
#SBATCH -p gengpu
#SBATCH --gres=gpu:a100:1
#SBATCH --nodes=2
#SBATCH --ntasks-per-node=1
#SBATCH --time=24:00:00
#SBATCH --mem=64G
#SBATCH --cpus-per-task=4
#SBATCH --job-name=SLR_OCR
#SBATCH --output=slr_ocr_logs.log
#SBATCH --mail-type=BEGIN,END,FAIL
#SBATCH --mail-user=gaughan@u.northwestern.edu
echo "setting up the environment"
module purge
eval "$(conda shell.bash hook)"
conda activate olmocr
echo "running the pdf to json ocr conversion"
python -m olmocr.pipeline ./studies_json --pdfs ./studies_pdf/*.pdf
echo "job finished, cleaning up"
conda deactivate
echo "job pau at: $(date)"

6
slr_ocr_logs.log Normal file
View File

@ -0,0 +1,6 @@
setting up the environment
running the pdf to json ocr conversion
ERROR:olmocr.check:pdftoppm is not installed.
ERROR:olmocr.check:Check the README in the https://github.com/allenai/olmocr/blob/main/README.md for installation instructions
job finished, cleaning up
job pau at: Tue May 20 14:29:36 CDT 2025