diff --git a/pyproject.toml b/pyproject.toml index b312298..60f2110 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,6 +7,7 @@ requires-python = ">=3.9" dependencies = [ "deltas>=0.7.0", "mediawiki-utilities>=0.4.18", + "more-itertools>=10.7.0", "mwpersistence>=0.2.4", "mwreverts>=0.1.5", "mwtypes>=0.4.0", diff --git a/src/wikiq/__init__.py b/src/wikiq/__init__.py index 52a68e8..6f0e508 100755 --- a/src/wikiq/__init__.py +++ b/src/wikiq/__init__.py @@ -12,6 +12,7 @@ import sys from collections import deque from hashlib import sha1 from io import TextIOWrapper +from more_itertools import chunked from itertools import groupby from subprocess import PIPE, Popen from typing import IO, Any, Generator, TextIO, Union @@ -676,6 +677,8 @@ class WikiqParser: writer = pq_writers[page.mwpage.namespace] writer.write(pa.record_batch(row_buffer, schema=schema)) + record_batch = pa.record_batch(output_buffer, schema=schema) + writer.write_batch(record_batch) page_count += 1 print(