diff --git a/models/p1-classification.py b/models/p1-classification.py new file mode 100644 index 0000000..ef8fc4b --- /dev/null +++ b/models/p1-classification.py @@ -0,0 +1,8 @@ +from transformers import AutoModelForCausalLM, AutoTokenizer, OlmoForCausalLM +import torch + +#load in the different models +olmo = AutoModelForCausalLLM.from_pretrained("allenai/OLMo-2-0425-1B-Instruct") +tokenizer = AutoTokenizer.from_pretrained("allenai/OLMo-2-0425-1B-Instruct") + +# diff --git a/models/p2-first-ir.py b/models/p2-first-ir.py new file mode 100644 index 0000000..ed32f3c --- /dev/null +++ b/models/p2-first-ir.py @@ -0,0 +1,12 @@ +from transformers import AutoModelForCausalLM, AutoTokenizer, OlmoForCausalLM +import torch + +#load in the different models +olmo = AutoModelForCausalLLM.from_pretrained("allenai/OLMo-2-0425-1B-Instruct") +tokenizer = AutoTokenizer.from_pretrained("allenai/OLMo-2-0425-1B-Instruct") + +#prompt + +#hand the model the data + +#collect the response diff --git a/scripts/quest_srun.sh b/scripts/quest_srun.sh new file mode 100644 index 0000000..a5b6a9b --- /dev/null +++ b/scripts/quest_srun.sh @@ -0,0 +1,29 @@ +#!/bin/bash +#SBATCH -A p32852 +#SBATCH -p gengpu +#SBATCH --gres=gpu:a100:1 +#SBATCH --nodes=2 +#SBATCH --ntasks-per-node=1 +#SBATCH --time=24:00:00 +#SBATCH --mem=64G +#SBATCH --cpus-per-task=4 +#SBATCH --job-name=SLR_OCR +#SBATCH --output=slr_ocr_logs.log +#SBATCH --mail-type=BEGIN,END,FAIL +#SBATCH --mail-user=gaughan@u.northwestern.edu + +echo "setting up the environment" + +module purge +eval "$(conda shell.bash hook)" +conda activate olmocr + +echo "running the pdf to json ocr conversion" + +python -m olmocr.pipeline ./studies_json --pdfs ./studies_pdf/*.pdf + +echo "job finished, cleaning up" + +conda deactivate + +echo "job pau at: $(date)" diff --git a/slr_ocr_logs.log b/slr_ocr_logs.log new file mode 100644 index 0000000..c6bdec5 --- /dev/null +++ b/slr_ocr_logs.log @@ -0,0 +1,6 @@ +setting up the environment +running the pdf to json ocr conversion +ERROR:olmocr.check:pdftoppm is not installed. +ERROR:olmocr.check:Check the README in the https://github.com/allenai/olmocr/blob/main/README.md for installation instructions +job finished, cleaning up +job pau at: Tue May 20 14:29:36 CDT 2025 diff --git a/studies/.DS_Store b/studies_pdfs/.DS_Store similarity index 100% rename from studies/.DS_Store rename to studies_pdfs/.DS_Store diff --git a/studies/001-adams.pdf b/studies_pdfs/001-adams.pdf similarity index 100% rename from studies/001-adams.pdf rename to studies_pdfs/001-adams.pdf diff --git a/studies/002-barcomb.pdf b/studies_pdfs/002-barcomb.pdf similarity index 100% rename from studies/002-barcomb.pdf rename to studies_pdfs/002-barcomb.pdf diff --git a/studies/003-bogart.pdf b/studies_pdfs/003-bogart.pdf similarity index 100% rename from studies/003-bogart.pdf rename to studies_pdfs/003-bogart.pdf diff --git a/studies/004-butler.pdf b/studies_pdfs/004-butler.pdf similarity index 100% rename from studies/004-butler.pdf rename to studies_pdfs/004-butler.pdf diff --git a/studies/005-crowston-shamshurin.pdf b/studies_pdfs/005-crowston-shamshurin.pdf similarity index 100% rename from studies/005-crowston-shamshurin.pdf rename to studies_pdfs/005-crowston-shamshurin.pdf diff --git a/studies/006-franke.pdf b/studies_pdfs/006-franke.pdf similarity index 100% rename from studies/006-franke.pdf rename to studies_pdfs/006-franke.pdf diff --git a/studies/007-gamalielsson.pdf b/studies_pdfs/007-gamalielsson.pdf similarity index 100% rename from studies/007-gamalielsson.pdf rename to studies_pdfs/007-gamalielsson.pdf diff --git a/studies/008-geiger.pdf b/studies_pdfs/008-geiger.pdf similarity index 100% rename from studies/008-geiger.pdf rename to studies_pdfs/008-geiger.pdf diff --git a/studies/009-hsieh.pdf b/studies_pdfs/009-hsieh.pdf similarity index 100% rename from studies/009-hsieh.pdf rename to studies_pdfs/009-hsieh.pdf diff --git a/studies/010-hu.pdf b/studies_pdfs/010-hu.pdf similarity index 100% rename from studies/010-hu.pdf rename to studies_pdfs/010-hu.pdf diff --git a/studies/011-jahanshahi.pdf b/studies_pdfs/011-jahanshahi.pdf similarity index 100% rename from studies/011-jahanshahi.pdf rename to studies_pdfs/011-jahanshahi.pdf diff --git a/studies/012-jensen-scacchi.pdf b/studies_pdfs/012-jensen-scacchi.pdf similarity index 100% rename from studies/012-jensen-scacchi.pdf rename to studies_pdfs/012-jensen-scacchi.pdf diff --git a/studies/013-klug.pdf b/studies_pdfs/013-klug.pdf similarity index 100% rename from studies/013-klug.pdf rename to studies_pdfs/013-klug.pdf diff --git a/studies/014-norskov.pdf b/studies_pdfs/014-norskov.pdf similarity index 100% rename from studies/014-norskov.pdf rename to studies_pdfs/014-norskov.pdf diff --git a/studies/015-santos.pdf b/studies_pdfs/015-santos.pdf similarity index 100% rename from studies/015-santos.pdf rename to studies_pdfs/015-santos.pdf diff --git a/studies/016-sojer-henkel.pdf b/studies_pdfs/016-sojer-henkel.pdf similarity index 100% rename from studies/016-sojer-henkel.pdf rename to studies_pdfs/016-sojer-henkel.pdf diff --git a/studies/017-wessel.pdf b/studies_pdfs/017-wessel.pdf similarity index 100% rename from studies/017-wessel.pdf rename to studies_pdfs/017-wessel.pdf diff --git a/studies/018-yin.pdf b/studies_pdfs/018-yin.pdf similarity index 100% rename from studies/018-yin.pdf rename to studies_pdfs/018-yin.pdf