From 7a61fd38d95813c4babb52b2f25eec17d7e6220b Mon Sep 17 00:00:00 2001 From: Nathan TeBlunthuis Date: Thu, 7 Aug 2025 17:05:06 -0700 Subject: [PATCH] set num_context_lines; output counts. --- test/benchmark_wd2.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/test/benchmark_wd2.py b/test/benchmark_wd2.py index afedecc..2c4aec0 100644 --- a/test/benchmark_wd2.py +++ b/test/benchmark_wd2.py @@ -5,7 +5,7 @@ import sys from typing import Generator, Tuple, List, Any, Dict import pywikidiff2 -differ = pywikidiff2.pywikidiff2() +differ = pywikidiff2.pywikidiff2(num_context_lines=10) def _iter_row_dicts(parquet_file: pq.ParquetFile) -> Generator[Dict[str, Any], None, None]: for batch in parquet_file.iter_batches(columns=['articleid', 'text']): @@ -27,12 +27,16 @@ def stream_text_by_article_streaming(file_path: str) -> Generator[Tuple[Any, Lis print(f"An error occurred: {e}") - - +p = 0 +i = 0 with open('/dev/null', 'w') as of: + p = p + 1 for _, texts in stream_text_by_article_streaming(sys.argv[1]): + i = i + 1 last_text = "" for text in texts: result = differ.inline_json_diff(last_text, text) print(result, file=of) last_text = text +print(f"{p} pages") +print(f"{i} revisions")