From 59fea1919e37ee69b0ff8873524eb5f14efcda20 Mon Sep 17 00:00:00 2001 From: Nathan TeBlunthuis Date: Sun, 1 Mar 2026 20:05:12 -0800 Subject: [PATCH] Add redirect_target column to wikiq output. Exposes page.redirect as a nullable string column so downstream pipelines can build redirect maps for link target resolution. --- src/wikiq/__init__.py | 3 ++- src/wikiq/tables.py | 8 +++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/wikiq/__init__.py b/src/wikiq/__init__.py index cd8992f..4c4e242 100755 --- a/src/wikiq/__init__.py +++ b/src/wikiq/__init__.py @@ -133,6 +133,7 @@ def build_table( tables.RevisionArticleId(), tables.RevisionPageTitle(), tables.RevisionNamespace(), + tables.RevisionRedirectTarget(), tables.RevisionDeleted(), tables.RevisionEditorId(), tables.RevisionEditSummary(), @@ -1004,7 +1005,7 @@ class WikiqParser: if rev.id == page_resume_revid: found_resume_point = True is_resume_page = False - print(f"Resuming output after revid {rev.id}", file=sys.stderr) + print(f"Resuming output after revid {rev.id}", file=sys.stderr, flush=True) continue rev_count += 1 diff --git a/src/wikiq/tables.py b/src/wikiq/tables.py index 23e8f57..715c5dd 100644 --- a/src/wikiq/tables.py +++ b/src/wikiq/tables.py @@ -1,4 +1,3 @@ -import sys from abc import abstractmethod, ABC from datetime import datetime, timezone from hashlib import sha1 @@ -144,6 +143,13 @@ class RevisionNamespace(RevisionField[int]): return page.namespace +class RevisionRedirectTarget(RevisionField[Union[str, None]]): + field = pa.field("redirect_target", pa.string(), nullable=True) + + def extract(self, page: mwtypes.Page, revisions: list[mwxml.Revision]) -> Union[str, None]: + return page.redirect + + class RevisionSha1(RevisionField[str]): field = pa.field("sha1", pa.string())