update deps and add edit_summary to wikiq output.
This commit is contained in:
parent
22d14dc5f2
commit
bd22d26291
@ -14,12 +14,11 @@ dependencies = [
|
|||||||
"yamlconf",
|
"yamlconf",
|
||||||
]
|
]
|
||||||
|
|
||||||
# [tool.uv.sources]
|
[tool.uv.sources]
|
||||||
# yamlconf = { git = "https://github.com/groceryheist/yamlconf" }
|
yamlconf = { git = "https://github.com/groceryheist/yamlconf" }
|
||||||
# mwxml = { git = "https://github.com/groceryheist/python-mwxml" }
|
mwxml = { git = "https://github.com/groceryheist/python-mwxml" }
|
||||||
|
|
||||||
[dependency-groups]
|
[dependency-groups]
|
||||||
dev = [
|
dev = [
|
||||||
"pandas>=2.1.0",
|
"pandas>=2.1.0"
|
||||||
"pytest>=8.3.5",
|
|
||||||
]
|
]
|
||||||
|
8
wikiq
8
wikiq
@ -242,6 +242,7 @@ class RevDataBase:
|
|||||||
title: str
|
title: str
|
||||||
namespace: int
|
namespace: int
|
||||||
deleted: bool
|
deleted: bool
|
||||||
|
edit_summary: str
|
||||||
text_chars: int = None
|
text_chars: int = None
|
||||||
revert: bool = None
|
revert: bool = None
|
||||||
reverteds: list[int] = None
|
reverteds: list[int] = None
|
||||||
@ -271,7 +272,8 @@ class RevDataBase:
|
|||||||
pa.field("sha1",pa.string()),
|
pa.field("sha1",pa.string()),
|
||||||
pa.field("minor",pa.bool_()),
|
pa.field("minor",pa.bool_()),
|
||||||
pa.field("editor",pa.string()),
|
pa.field("editor",pa.string()),
|
||||||
pa.field("anon",pa.bool_())
|
pa.field("anon",pa.bool_()),
|
||||||
|
pa.field("edit_summary",pa.bool_())
|
||||||
]
|
]
|
||||||
|
|
||||||
# pyarrow is a columnar format, so most of the work happens in the flush_parquet_buffer function
|
# pyarrow is a columnar format, so most of the work happens in the flush_parquet_buffer function
|
||||||
@ -535,7 +537,8 @@ class WikiqParser:
|
|||||||
editorid="" if rev.deleted.user == True or rev.user.id is None else rev.user.id,
|
editorid="" if rev.deleted.user == True or rev.user.id is None else rev.user.id,
|
||||||
title=page.title,
|
title=page.title,
|
||||||
deleted=rev.deleted.text,
|
deleted=rev.deleted.text,
|
||||||
namespace=namespace
|
namespace=namespace,
|
||||||
|
edit_summary=rev.comment
|
||||||
)
|
)
|
||||||
|
|
||||||
rev_data = self.matchmake_revision(rev, rev_data)
|
rev_data = self.matchmake_revision(rev, rev_data)
|
||||||
@ -717,6 +720,7 @@ def match_archive_suffix(input_filename):
|
|||||||
|
|
||||||
def open_input_file(input_filename, fandom_2020=False):
|
def open_input_file(input_filename, fandom_2020=False):
|
||||||
cmd = match_archive_suffix(input_filename)
|
cmd = match_archive_suffix(input_filename)
|
||||||
|
|
||||||
if fandom_2020:
|
if fandom_2020:
|
||||||
cmd.append("*.xml")
|
cmd.append("*.xml")
|
||||||
try:
|
try:
|
||||||
|
Loading…
Reference in New Issue
Block a user