Add redirect_target column to wikiq output.
Exposes page.redirect as a nullable string column so downstream pipelines can build redirect maps for link target resolution.
This commit is contained in:
@@ -133,6 +133,7 @@ def build_table(
|
||||
tables.RevisionArticleId(),
|
||||
tables.RevisionPageTitle(),
|
||||
tables.RevisionNamespace(),
|
||||
tables.RevisionRedirectTarget(),
|
||||
tables.RevisionDeleted(),
|
||||
tables.RevisionEditorId(),
|
||||
tables.RevisionEditSummary(),
|
||||
@@ -1004,7 +1005,7 @@ class WikiqParser:
|
||||
if rev.id == page_resume_revid:
|
||||
found_resume_point = True
|
||||
is_resume_page = False
|
||||
print(f"Resuming output after revid {rev.id}", file=sys.stderr)
|
||||
print(f"Resuming output after revid {rev.id}", file=sys.stderr, flush=True)
|
||||
continue
|
||||
|
||||
rev_count += 1
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
import sys
|
||||
from abc import abstractmethod, ABC
|
||||
from datetime import datetime, timezone
|
||||
from hashlib import sha1
|
||||
@@ -144,6 +143,13 @@ class RevisionNamespace(RevisionField[int]):
|
||||
return page.namespace
|
||||
|
||||
|
||||
class RevisionRedirectTarget(RevisionField[Union[str, None]]):
|
||||
field = pa.field("redirect_target", pa.string(), nullable=True)
|
||||
|
||||
def extract(self, page: mwtypes.Page, revisions: list[mwxml.Revision]) -> Union[str, None]:
|
||||
return page.redirect
|
||||
|
||||
|
||||
class RevisionSha1(RevisionField[str]):
|
||||
field = pa.field("sha1", pa.string())
|
||||
|
||||
|
||||
Reference in New Issue
Block a user