update deps and add edit_summary to wikiq output.

2025-05-29 18:02:14 -07:00 · 2025-05-29 18:02:14 -07:00 · bd22d26291
commit bd22d26291
parent 22d14dc5f2
2 changed files with 10 additions and 7 deletions
--- a/pyproject.toml
+++ b/pyproject.toml
@ -14,12 +14,11 @@ dependencies = [
    "yamlconf",
 ]
-# [tool.uv.sources]
+[tool.uv.sources]
-# yamlconf = { git = "https://github.com/groceryheist/yamlconf" }
+yamlconf = { git = "https://github.com/groceryheist/yamlconf" }
-# mwxml = { git = "https://github.com/groceryheist/python-mwxml" }
+mwxml = { git = "https://github.com/groceryheist/python-mwxml" }
 [dependency-groups]
 dev = [
-    "pandas>=2.1.0",
+    "pandas>=2.1.0"
    "pytest>=8.3.5",
 ]
--- a/8
+++ b/8
@ -242,6 +242,7 @@ class RevDataBase:
    title: str
    namespace: int
    deleted: bool
    edit_summary: str
    text_chars: int = None
    revert: bool = None
    reverteds: list[int] = None
@ -271,7 +272,8 @@ class RevDataBase:
        pa.field("sha1",pa.string()),
        pa.field("minor",pa.bool_()),
        pa.field("editor",pa.string()),
-        pa.field("anon",pa.bool_())
+        pa.field("anon",pa.bool_()),
        pa.field("edit_summary",pa.bool_())
    ]
    # pyarrow is a columnar format, so most of the work happens in the flush_parquet_buffer function
@ -535,7 +537,8 @@ class WikiqParser:
                                             editorid="" if rev.deleted.user == True or rev.user.id is None else rev.user.id,
                                             title=page.title,
                                             deleted=rev.deleted.text,
-                                             namespace=namespace
+                                             namespace=namespace,
                                             edit_summary=rev.comment
                                             )
                rev_data = self.matchmake_revision(rev, rev_data)
@ -717,6 +720,7 @@ def match_archive_suffix(input_filename):
 def open_input_file(input_filename, fandom_2020=False):
    cmd = match_archive_suffix(input_filename)
    if fandom_2020:
        cmd.append("*.xml")
    try: