Re-add special case where revert radius is zero
Signed-off-by: Will Beason <willbeason@gmail.com>
This commit is contained in:
parent
17c7f208ab
commit
89465b29f4
9
wikiq
9
wikiq
@ -500,7 +500,7 @@ class WikiqParser:
|
|||||||
writer: pq.ParquetWriter | pc.CSVWriter
|
writer: pq.ParquetWriter | pc.CSVWriter
|
||||||
|
|
||||||
schema = table.schema()
|
schema = table.schema()
|
||||||
schema = schema.append(pa.field('revert', pa.bool_()))
|
schema = schema.append(pa.field('revert', pa.bool_(), nullable=True))
|
||||||
|
|
||||||
if self.output_parquet:
|
if self.output_parquet:
|
||||||
writer = pq.ParquetWriter(self.output_file, schema, flavor='spark')
|
writer = pq.ParquetWriter(self.output_file, schema, flavor='spark')
|
||||||
@ -512,12 +512,9 @@ class WikiqParser:
|
|||||||
|
|
||||||
# skip namespaces not in the filter
|
# skip namespaces not in the filter
|
||||||
if self.namespace_filter is not None:
|
if self.namespace_filter is not None:
|
||||||
if page.namespace not in self.namespace_filter:
|
if page.mwpage.namespace not in self.namespace_filter:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# if page.namespace != 0:
|
|
||||||
# page.mwpage.title = ':'.join([dump.namespace_map[page.namespace], page.title])
|
|
||||||
|
|
||||||
# Disable detecting reverts if radius is 0.
|
# Disable detecting reverts if radius is 0.
|
||||||
if self.revert_radius > 0:
|
if self.revert_radius > 0:
|
||||||
reverts_column.rev_detector = mwreverts.Detector(radius=self.revert_radius)
|
reverts_column.rev_detector = mwreverts.Detector(radius=self.revert_radius)
|
||||||
@ -552,7 +549,7 @@ class WikiqParser:
|
|||||||
|
|
||||||
is_revert_column: list[bool | None] = []
|
is_revert_column: list[bool | None] = []
|
||||||
for r, d in zip(buffer['reverteds'], buffer['deleted']):
|
for r, d in zip(buffer['reverteds'], buffer['deleted']):
|
||||||
if d:
|
if self.revert_radius == 0 or d:
|
||||||
is_revert_column.append(None)
|
is_revert_column.append(None)
|
||||||
else:
|
else:
|
||||||
is_revert_column.append(r is not None)
|
is_revert_column.append(r is not None)
|
||||||
|
Loading…
Reference in New Issue
Block a user