try reducing memory more.
This commit is contained in:
		
							parent
							
								
									615d630ff0
								
							
						
					
					
						commit
						7528dc8b8e
					
				| @ -493,9 +493,9 @@ class WikiqParser: | |||||||
|                     num_context_lines=1000000, |                     num_context_lines=1000000, | ||||||
|                     max_word_level_diff_complexity=-1, |                     max_word_level_diff_complexity=-1, | ||||||
|                     moved_paragraph_detection_cutoff=-1, |                     moved_paragraph_detection_cutoff=-1, | ||||||
|                     words_cache_capacity=2000, |                     words_cache_capacity=1000, | ||||||
|                     diff_cache_capacity=2000, |                     diff_cache_capacity=1000, | ||||||
|                     stats_cache_capacity=2000, |                     stats_cache_capacity=1000, | ||||||
|                 ) |                 ) | ||||||
| 
 | 
 | ||||||
|             while not on_last_batch: |             while not on_last_batch: | ||||||
| @ -875,7 +875,7 @@ def main(): | |||||||
|     parser.add_argument( |     parser.add_argument( | ||||||
|         "--batch-size", |         "--batch-size", | ||||||
|         dest="batch_size", |         dest="batch_size", | ||||||
|         default=8000, |         default=4000, | ||||||
|         type=int, |         type=int, | ||||||
|         help="How many revisions to process in each batch. This ends up being the Parquet row group size", |         help="How many revisions to process in each batch. This ends up being the Parquet row group size", | ||||||
|     ) |     ) | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user