Re-add special case where revert radius is zero
Signed-off-by: Will Beason <willbeason@gmail.com>
This commit is contained in:
		
							parent
							
								
									17c7f208ab
								
							
						
					
					
						commit
						89465b29f4
					
				
							
								
								
									
										9
									
								
								wikiq
									
									
									
									
									
								
							
							
						
						
									
										9
									
								
								wikiq
									
									
									
									
									
								
							| @ -500,7 +500,7 @@ class WikiqParser: | |||||||
|         writer: pq.ParquetWriter | pc.CSVWriter |         writer: pq.ParquetWriter | pc.CSVWriter | ||||||
| 
 | 
 | ||||||
|         schema = table.schema() |         schema = table.schema() | ||||||
|         schema = schema.append(pa.field('revert', pa.bool_())) |         schema = schema.append(pa.field('revert', pa.bool_(), nullable=True)) | ||||||
| 
 | 
 | ||||||
|         if self.output_parquet: |         if self.output_parquet: | ||||||
|             writer = pq.ParquetWriter(self.output_file, schema, flavor='spark') |             writer = pq.ParquetWriter(self.output_file, schema, flavor='spark') | ||||||
| @ -512,12 +512,9 @@ class WikiqParser: | |||||||
| 
 | 
 | ||||||
|             # skip namespaces not in the filter |             # skip namespaces not in the filter | ||||||
|             if self.namespace_filter is not None: |             if self.namespace_filter is not None: | ||||||
|                 if page.namespace not in self.namespace_filter: |                 if page.mwpage.namespace not in self.namespace_filter: | ||||||
|                     continue |                     continue | ||||||
| 
 | 
 | ||||||
|             # if page.namespace != 0: |  | ||||||
|             #     page.mwpage.title = ':'.join([dump.namespace_map[page.namespace], page.title]) |  | ||||||
| 
 |  | ||||||
|             # Disable detecting reverts if radius is 0. |             # Disable detecting reverts if radius is 0. | ||||||
|             if self.revert_radius > 0: |             if self.revert_radius > 0: | ||||||
|                 reverts_column.rev_detector = mwreverts.Detector(radius=self.revert_radius) |                 reverts_column.rev_detector = mwreverts.Detector(radius=self.revert_radius) | ||||||
| @ -552,7 +549,7 @@ class WikiqParser: | |||||||
| 
 | 
 | ||||||
|             is_revert_column: list[bool | None] = [] |             is_revert_column: list[bool | None] = [] | ||||||
|             for r, d in zip(buffer['reverteds'], buffer['deleted']): |             for r, d in zip(buffer['reverteds'], buffer['deleted']): | ||||||
|                 if d: |                 if self.revert_radius == 0 or d: | ||||||
|                     is_revert_column.append(None) |                     is_revert_column.append(None) | ||||||
|                 else: |                 else: | ||||||
|                     is_revert_column.append(r is not None) |                     is_revert_column.append(r is not None) | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user