From 615d630ff0d5c413b46a508edac2c3b307bebaa2 Mon Sep 17 00:00:00 2001 From: Nathan TeBlunthuis Date: Fri, 1 Aug 2025 19:45:21 -0700 Subject: [PATCH] reduce memory usage. --- src/wikiq/__init__.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/wikiq/__init__.py b/src/wikiq/__init__.py index 31db834..e9dcc33 100755 --- a/src/wikiq/__init__.py +++ b/src/wikiq/__init__.py @@ -493,9 +493,9 @@ class WikiqParser: num_context_lines=1000000, max_word_level_diff_complexity=-1, moved_paragraph_detection_cutoff=-1, - words_cache_capacity=5000, - diff_cache_capacity=5000, - stats_cache_capacity=5000, + words_cache_capacity=2000, + diff_cache_capacity=2000, + stats_cache_capacity=2000, ) while not on_last_batch: @@ -875,7 +875,7 @@ def main(): parser.add_argument( "--batch-size", dest="batch_size", - default=16000, + default=8000, type=int, help="How many revisions to process in each batch. This ends up being the Parquet row group size", )