reduce memory a tich more.
This commit is contained in:
parent
9799919470
commit
2f853a879d
@ -493,9 +493,9 @@ class WikiqParser:
|
|||||||
num_context_lines=1000000,
|
num_context_lines=1000000,
|
||||||
max_word_level_diff_complexity=-1,
|
max_word_level_diff_complexity=-1,
|
||||||
moved_paragraph_detection_cutoff=-1,
|
moved_paragraph_detection_cutoff=-1,
|
||||||
words_cache_capacity=1000,
|
words_cache_capacity=500,
|
||||||
diff_cache_capacity=1000,
|
diff_cache_capacity=500,
|
||||||
stats_cache_capacity=1000,
|
stats_cache_capacity=500,
|
||||||
)
|
)
|
||||||
|
|
||||||
while not on_last_batch:
|
while not on_last_batch:
|
||||||
@ -875,7 +875,7 @@ def main():
|
|||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--batch-size",
|
"--batch-size",
|
||||||
dest="batch_size",
|
dest="batch_size",
|
||||||
default=2000,
|
default=1500,
|
||||||
type=int,
|
type=int,
|
||||||
help="How many revisions to process in each batch. This ends up being the Parquet row group size",
|
help="How many revisions to process in each batch. This ends up being the Parquet row group size",
|
||||||
)
|
)
|
||||||
|
Loading…
Reference in New Issue
Block a user