update deps and add edit_summary to wikiq output.
This commit is contained in:
parent
22d14dc5f2
commit
bd22d26291
@ -14,12 +14,11 @@ dependencies = [
|
||||
"yamlconf",
|
||||
]
|
||||
|
||||
# [tool.uv.sources]
|
||||
# yamlconf = { git = "https://github.com/groceryheist/yamlconf" }
|
||||
# mwxml = { git = "https://github.com/groceryheist/python-mwxml" }
|
||||
[tool.uv.sources]
|
||||
yamlconf = { git = "https://github.com/groceryheist/yamlconf" }
|
||||
mwxml = { git = "https://github.com/groceryheist/python-mwxml" }
|
||||
|
||||
[dependency-groups]
|
||||
dev = [
|
||||
"pandas>=2.1.0",
|
||||
"pytest>=8.3.5",
|
||||
"pandas>=2.1.0"
|
||||
]
|
||||
|
8
wikiq
8
wikiq
@ -242,6 +242,7 @@ class RevDataBase:
|
||||
title: str
|
||||
namespace: int
|
||||
deleted: bool
|
||||
edit_summary: str
|
||||
text_chars: int = None
|
||||
revert: bool = None
|
||||
reverteds: list[int] = None
|
||||
@ -271,7 +272,8 @@ class RevDataBase:
|
||||
pa.field("sha1",pa.string()),
|
||||
pa.field("minor",pa.bool_()),
|
||||
pa.field("editor",pa.string()),
|
||||
pa.field("anon",pa.bool_())
|
||||
pa.field("anon",pa.bool_()),
|
||||
pa.field("edit_summary",pa.bool_())
|
||||
]
|
||||
|
||||
# pyarrow is a columnar format, so most of the work happens in the flush_parquet_buffer function
|
||||
@ -535,7 +537,8 @@ class WikiqParser:
|
||||
editorid="" if rev.deleted.user == True or rev.user.id is None else rev.user.id,
|
||||
title=page.title,
|
||||
deleted=rev.deleted.text,
|
||||
namespace=namespace
|
||||
namespace=namespace,
|
||||
edit_summary=rev.comment
|
||||
)
|
||||
|
||||
rev_data = self.matchmake_revision(rev, rev_data)
|
||||
@ -717,6 +720,7 @@ def match_archive_suffix(input_filename):
|
||||
|
||||
def open_input_file(input_filename, fandom_2020=False):
|
||||
cmd = match_archive_suffix(input_filename)
|
||||
|
||||
if fandom_2020:
|
||||
cmd.append("*.xml")
|
||||
try:
|
||||
|
Loading…
Reference in New Issue
Block a user