Merge branch 'parquet_support' into test-parquet

This commit is contained in:
Will Beason 2025-06-17 12:20:19 -05:00
commit 4bbed4a196
3 changed files with 11 additions and 3 deletions

View File

@ -21,6 +21,5 @@ mwxml = { git = "https://github.com/groceryheist/python-mwxml" }
[dependency-groups]
dev = [
"pandas>=2.1.0",
"pytest>=8.3.5",
"pandas>=2.1.0"
]

View File

@ -104,6 +104,14 @@ class RevisionEditorId(RevisionField[Union[int, None]]):
return revision.user.id
class RevisionEditSummary(RevisionField[Union[str, None]]):
field = pa.field("edit_summary", pa.string(), nullable=True)
def extract(self, page: mwtypes.Page, revisions: list[mwxml.Revision]) -> Union[str, None]:
revision = revisions[-1]
return revision.comment
class RevisionIsAnon(RevisionField[Union[bool, None]]):
field = pa.field("anon", pa.bool_(), nullable=True)

3
wikiq
View File

@ -317,10 +317,11 @@ class WikiqParser:
tables.RevisionId(),
tables.RevisionTimestamp(),
tables.RevisionArticleId(),
tables.RevisionEditorId(),
tables.RevisionPageTitle(),
tables.RevisionNamespace(),
tables.RevisionDeleted(),
tables.RevisionEditorId(),
tables.RevisionEditSummary(),
tables.RevisionTextChars(),
reverts_column,
tables.RevisionSha1(),