Merge branch 'compute-diffs' into HEAD

This commit is contained in:
Nathan TeBlunthuis
2025-07-15 15:23:13 -07:00
2 changed files with 4 additions and 0 deletions

View File

@@ -7,6 +7,7 @@ requires-python = ">=3.9"
dependencies = [ dependencies = [
"deltas>=0.7.0", "deltas>=0.7.0",
"mediawiki-utilities>=0.4.18", "mediawiki-utilities>=0.4.18",
"more-itertools>=10.7.0",
"mwpersistence>=0.2.4", "mwpersistence>=0.2.4",
"mwreverts>=0.1.5", "mwreverts>=0.1.5",
"mwtypes>=0.4.0", "mwtypes>=0.4.0",

View File

@@ -12,6 +12,7 @@ import sys
from collections import deque from collections import deque
from hashlib import sha1 from hashlib import sha1
from io import TextIOWrapper from io import TextIOWrapper
from more_itertools import chunked
from itertools import groupby from itertools import groupby
from subprocess import PIPE, Popen from subprocess import PIPE, Popen
from typing import IO, Any, Generator, TextIO, Union from typing import IO, Any, Generator, TextIO, Union
@@ -676,6 +677,8 @@ class WikiqParser:
writer = pq_writers[page.mwpage.namespace] writer = pq_writers[page.mwpage.namespace]
writer.write(pa.record_batch(row_buffer, schema=schema)) writer.write(pa.record_batch(row_buffer, schema=schema))
record_batch = pa.record_batch(output_buffer, schema=schema)
writer.write_batch(record_batch)
page_count += 1 page_count += 1
print( print(