updating with docker images and categorized citations
This commit is contained in:
parent
c7448f2fc2
commit
d8b9ca9dea
3098
053025_olmo_categorized_citations.csv
Normal file
3098
053025_olmo_categorized_citations.csv
Normal file
File diff suppressed because it is too large
Load Diff
12
containers/olmocr_container.def
Normal file
12
containers/olmocr_container.def
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
Bootstrap: docker
|
||||||
|
From: ubuntu:groovy
|
||||||
|
|
||||||
|
%post
|
||||||
|
export DEBIAN_FRONTEND=noninteractive
|
||||||
|
### installing requisite packages for olmocr
|
||||||
|
apt-get -y update && apt-get -y install python3.11 poppler-utils ttf-mscorefonts-installer
|
||||||
|
apt-get msttcorefonts fonts-crosextra-caladea fonts-crosextra-carlito gsfonts lcdf-typetools
|
||||||
|
pip install olmocr[gpu] --find-links https://flashinfer.ai/whl/cu124/torch2.4/flashinfer/
|
||||||
|
|
||||||
|
|
||||||
|
|
20
containers/olmocr_docker.sh
Normal file
20
containers/olmocr_docker.sh
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
#SBATCH -A p32852
|
||||||
|
#SBATCH -p gengpu
|
||||||
|
#SBATCH --gres=gpu:a100:1
|
||||||
|
#SBATCH --nodes=2
|
||||||
|
#SBATCH --ntasks-per-node=1
|
||||||
|
#SBATCH --time=24:00:00
|
||||||
|
#SBATCH --mem=64G
|
||||||
|
#SBATCH --cpus-per-task=4
|
||||||
|
#SBATCH --job-name=olmocr-pull-docker-img
|
||||||
|
#SBATCH --output=olmocr-pull.log
|
||||||
|
#SBATCH --mail-type=BEGIN,END,FAIL
|
||||||
|
#SBATCH --mail-user=gaughan@u.northwestern.edu
|
||||||
|
|
||||||
|
# using singularity to pull the allenai olmocr docker image
|
||||||
|
module load singularity
|
||||||
|
|
||||||
|
export SINGULARITY_CACHEDIR=$TMPDIR
|
||||||
|
|
||||||
|
|
4
containers/singularity_pastebin.txt
Normal file
4
containers/singularity_pastebin.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
|
||||||
|
singularity pull docker://alleninstituteforai/olmocr:latest
|
||||||
|
|
9
p1-categorization.log
Normal file
9
p1-categorization.log
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
starting the job at: Fri May 30 21:46:19 CDT 2025
|
||||||
|
setting up the environment
|
||||||
|
running the p1 categorization script
|
||||||
|
cuda
|
||||||
|
NVIDIA A100-SXM4-80GB
|
||||||
|
_CudaDeviceProperties(name='NVIDIA A100-SXM4-80GB', major=8, minor=0, total_memory=81153MB, multi_processor_count=108, uuid=841be301-db75-9627-af0f-04d8965fd651, L2_cache_size=40MB)
|
||||||
|
Loading checkpoint shards: 0%| | 0/6 [00:00<?, ?it/s]
Loading checkpoint shards: 17%|█▋ | 1/6 [00:00<00:04, 1.04it/s]
Loading checkpoint shards: 33%|███▎ | 2/6 [00:02<00:04, 1.16s/it]
Loading checkpoint shards: 50%|█████ | 3/6 [00:03<00:03, 1.32s/it]
Loading checkpoint shards: 67%|██████▋ | 4/6 [00:05<00:02, 1.43s/it]
Loading checkpoint shards: 83%|████████▎ | 5/6 [00:06<00:01, 1.45s/it]
Loading checkpoint shards: 100%|██████████| 6/6 [00:07<00:00, 1.28s/it]
Loading checkpoint shards: 100%|██████████| 6/6 [00:07<00:00, 1.30s/it]
|
||||||
|
job finished, cleaning up
|
||||||
|
job pau at: Fri May 30 23:21:25 CDT 2025
|
Loading…
Reference in New Issue
Block a user