try reducing memory more.

This commit is contained in:
Nathan TeBlunthuis 2025-08-01 19:52:18 -07:00
parent 615d630ff0
commit 7528dc8b8e

View File

@ -493,9 +493,9 @@ class WikiqParser:
num_context_lines=1000000, num_context_lines=1000000,
max_word_level_diff_complexity=-1, max_word_level_diff_complexity=-1,
moved_paragraph_detection_cutoff=-1, moved_paragraph_detection_cutoff=-1,
words_cache_capacity=2000, words_cache_capacity=1000,
diff_cache_capacity=2000, diff_cache_capacity=1000,
stats_cache_capacity=2000, stats_cache_capacity=1000,
) )
while not on_last_batch: while not on_last_batch:
@ -875,7 +875,7 @@ def main():
parser.add_argument( parser.add_argument(
"--batch-size", "--batch-size",
dest="batch_size", dest="batch_size",
default=8000, default=4000,
type=int, type=int,
help="How many revisions to process in each batch. This ends up being the Parquet row group size", help="How many revisions to process in each batch. This ends up being the Parquet row group size",
) )