reduce memory a tich more.
This commit is contained in:
		
							parent
							
								
									9799919470
								
							
						
					
					
						commit
						2f853a879d
					
				| @ -493,9 +493,9 @@ class WikiqParser: | |||||||
|                     num_context_lines=1000000, |                     num_context_lines=1000000, | ||||||
|                     max_word_level_diff_complexity=-1, |                     max_word_level_diff_complexity=-1, | ||||||
|                     moved_paragraph_detection_cutoff=-1, |                     moved_paragraph_detection_cutoff=-1, | ||||||
|                     words_cache_capacity=1000, |                     words_cache_capacity=500, | ||||||
|                     diff_cache_capacity=1000, |                     diff_cache_capacity=500, | ||||||
|                     stats_cache_capacity=1000, |                     stats_cache_capacity=500, | ||||||
|                 ) |                 ) | ||||||
| 
 | 
 | ||||||
|             while not on_last_batch: |             while not on_last_batch: | ||||||
| @ -875,7 +875,7 @@ def main(): | |||||||
|     parser.add_argument( |     parser.add_argument( | ||||||
|         "--batch-size", |         "--batch-size", | ||||||
|         dest="batch_size", |         dest="batch_size", | ||||||
|         default=2000, |         default=1500, | ||||||
|         type=int, |         type=int, | ||||||
|         help="How many revisions to process in each batch. This ends up being the Parquet row group size", |         help="How many revisions to process in each batch. This ends up being the Parquet row group size", | ||||||
|     ) |     ) | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user