1
0

trying to get olmocr to run, updated categorization values

This commit is contained in:
mgaughan 2025-06-02 11:27:23 -05:00
parent 208e5dfc91
commit f9e3075b2b
6 changed files with 30 additions and 16 deletions

20
containers/ocr_run.sh Normal file
View File

@ -0,0 +1,20 @@
#!/bin/bash
#SBATCH -A p32852
#SBATCH -p gengpu
#SBATCH --gres=gpu:a100:1
#SBATCH --nodes=2
#SBATCH --ntasks-per-node=1
#SBATCH --time=24:00:00
#SBATCH --mem=64G
#SBATCH --cpus-per-task=4
#SBATCH --job-name=olmocr-pull-docker-img
#SBATCH --output=olmocr-pull.log
#SBATCH --mail-type=BEGIN,END,FAIL
#SBATCH --mail-user=gaughan@u.northwestern.edu
# using singularity to pull the allenai olmocr docker image
module load singularity
print("running ocr container")
singularity run

View File

@ -0,0 +1,9 @@
INFO: Converting OCI blobs to SIF format
INFO: Starting build...
Getting image source signatures
Copying blob sha256:96d54c3075c9eeaed5561fd620828fd6bb5d80ecae7cb25f9ba5f7d88ea6e15c
Copying blob sha256:09d415c238d76b32a7ea4a6e6add9542db9a5641f7f183af70aae185d0709e58
Copying blob sha256:9fe6e2e61518cba6844870c03b285737daec35e62baf25ae7744629ed3a7b470
Copying blob sha256:41f16248e682693ff20b3032c1d5e5541cc87c5af898ae2ff9b24d2940e59100
Copying blob sha256:95d7b781703928cf3c4eece39d800cccb76728c375fedf51ecd83833fb25e458
Copying blob sha256:8f6c9048534734f4c873935293b7296225846ceb31c1a158400a67ea170dde7f

View File

@ -1,12 +0,0 @@
Bootstrap: docker
From: ubuntu:groovy
%post
export DEBIAN_FRONTEND=noninteractive
### installing requisite packages for olmocr
apt-get -y update && apt-get -y install python3.11 poppler-utils ttf-mscorefonts-installer
apt-get msttcorefonts fonts-crosextra-caladea fonts-crosextra-carlito gsfonts lcdf-typetools
pip install olmocr[gpu] --find-links https://flashinfer.ai/whl/cu124/torch2.4/flashinfer/

View File

@ -17,4 +17,5 @@ module load singularity
export SINGULARITY_CACHEDIR=$TMPDIR
singularity pull olmocr.sif docker://alleninstituteforai/olmocr:latest

View File

@ -1,4 +0,0 @@
singularity pull docker://alleninstituteforai/olmocr:latest

Binary file not shown.