From 89465b29f40dbc5a4891dc5539b5ea2b7ad18ecb Mon Sep 17 00:00:00 2001 From: Will Beason Date: Tue, 3 Jun 2025 15:18:21 -0500 Subject: [PATCH] Re-add special case where revert radius is zero Signed-off-by: Will Beason --- wikiq | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/wikiq b/wikiq index 744e435..3705b5e 100755 --- a/wikiq +++ b/wikiq @@ -500,7 +500,7 @@ class WikiqParser: writer: pq.ParquetWriter | pc.CSVWriter schema = table.schema() - schema = schema.append(pa.field('revert', pa.bool_())) + schema = schema.append(pa.field('revert', pa.bool_(), nullable=True)) if self.output_parquet: writer = pq.ParquetWriter(self.output_file, schema, flavor='spark') @@ -512,12 +512,9 @@ class WikiqParser: # skip namespaces not in the filter if self.namespace_filter is not None: - if page.namespace not in self.namespace_filter: + if page.mwpage.namespace not in self.namespace_filter: continue - # if page.namespace != 0: - # page.mwpage.title = ':'.join([dump.namespace_map[page.namespace], page.title]) - # Disable detecting reverts if radius is 0. if self.revert_radius > 0: reverts_column.rev_detector = mwreverts.Detector(radius=self.revert_radius) @@ -552,7 +549,7 @@ class WikiqParser: is_revert_column: list[bool | None] = [] for r, d in zip(buffer['reverteds'], buffer['deleted']): - if d: + if self.revert_radius == 0 or d: is_revert_column.append(None) else: is_revert_column.append(r is not None)