Add redirect_target column to wikiq output.
Exposes page.redirect as a nullable string column so downstream pipelines can build redirect maps for link target resolution.
This commit is contained in:
@@ -133,6 +133,7 @@ def build_table(
|
|||||||
tables.RevisionArticleId(),
|
tables.RevisionArticleId(),
|
||||||
tables.RevisionPageTitle(),
|
tables.RevisionPageTitle(),
|
||||||
tables.RevisionNamespace(),
|
tables.RevisionNamespace(),
|
||||||
|
tables.RevisionRedirectTarget(),
|
||||||
tables.RevisionDeleted(),
|
tables.RevisionDeleted(),
|
||||||
tables.RevisionEditorId(),
|
tables.RevisionEditorId(),
|
||||||
tables.RevisionEditSummary(),
|
tables.RevisionEditSummary(),
|
||||||
@@ -1004,7 +1005,7 @@ class WikiqParser:
|
|||||||
if rev.id == page_resume_revid:
|
if rev.id == page_resume_revid:
|
||||||
found_resume_point = True
|
found_resume_point = True
|
||||||
is_resume_page = False
|
is_resume_page = False
|
||||||
print(f"Resuming output after revid {rev.id}", file=sys.stderr)
|
print(f"Resuming output after revid {rev.id}", file=sys.stderr, flush=True)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
rev_count += 1
|
rev_count += 1
|
||||||
|
|||||||
@@ -1,4 +1,3 @@
|
|||||||
import sys
|
|
||||||
from abc import abstractmethod, ABC
|
from abc import abstractmethod, ABC
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
from hashlib import sha1
|
from hashlib import sha1
|
||||||
@@ -144,6 +143,13 @@ class RevisionNamespace(RevisionField[int]):
|
|||||||
return page.namespace
|
return page.namespace
|
||||||
|
|
||||||
|
|
||||||
|
class RevisionRedirectTarget(RevisionField[Union[str, None]]):
|
||||||
|
field = pa.field("redirect_target", pa.string(), nullable=True)
|
||||||
|
|
||||||
|
def extract(self, page: mwtypes.Page, revisions: list[mwxml.Revision]) -> Union[str, None]:
|
||||||
|
return page.redirect
|
||||||
|
|
||||||
|
|
||||||
class RevisionSha1(RevisionField[str]):
|
class RevisionSha1(RevisionField[str]):
|
||||||
field = pa.field("sha1", pa.string())
|
field = pa.field("sha1", pa.string())
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user