Merge branch 'compute-diffs' into HEAD

This commit is contained in:
Nathan TeBlunthuis 2025-07-15 15:23:13 -07:00
commit 907a35323e
2 changed files with 4 additions and 0 deletions

View File

@ -7,6 +7,7 @@ requires-python = ">=3.9"
dependencies = [ dependencies = [
"deltas>=0.7.0", "deltas>=0.7.0",
"mediawiki-utilities>=0.4.18", "mediawiki-utilities>=0.4.18",
"more-itertools>=10.7.0",
"mwpersistence>=0.2.4", "mwpersistence>=0.2.4",
"mwreverts>=0.1.5", "mwreverts>=0.1.5",
"mwtypes>=0.4.0", "mwtypes>=0.4.0",

View File

@ -12,6 +12,7 @@ import sys
from collections import deque from collections import deque
from hashlib import sha1 from hashlib import sha1
from io import TextIOWrapper from io import TextIOWrapper
from more_itertools import chunked
from itertools import groupby from itertools import groupby
from subprocess import PIPE, Popen from subprocess import PIPE, Popen
from typing import IO, Any, Generator, TextIO, Union from typing import IO, Any, Generator, TextIO, Union
@ -676,6 +677,8 @@ class WikiqParser:
writer = pq_writers[page.mwpage.namespace] writer = pq_writers[page.mwpage.namespace]
writer.write(pa.record_batch(row_buffer, schema=schema)) writer.write(pa.record_batch(row_buffer, schema=schema))
record_batch = pa.record_batch(output_buffer, schema=schema)
writer.write_batch(record_batch)
page_count += 1 page_count += 1
print( print(