diff --git a/.gitignore b/.gitignore index 1e1f74f..c90a397 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,7 @@ # JetBrains /.idea + +# Python build and test output +__pycache__/ +test_output/ diff --git a/README.rst b/README.rst index b9e5f0a..761a9b3 100644 --- a/README.rst +++ b/README.rst @@ -12,6 +12,14 @@ submodule like:: Wikimedia dumps are usually in a compressed format such as 7z (most common), gz, or bz2. Wikiq uses your computer's compression software to read these files. Therefore wikiq depends on `7za`, `gzcat`, and `zcat`. +Dependencies +---------------- +These non-Python dependencies must be installed on your system for wikiq and its +associated tests to work. + +- 7zip +- ffmpeg + TODO: _______________ diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..a41306b --- /dev/null +++ b/requirements.txt @@ -0,0 +1,42 @@ +apeek==0.1.1 +attrs==25.3.0 +certifi==2025.4.26 +charset-normalizer==3.4.2 +Cython==0.29.37 +deltas==0.7.0 +docopt==0.6.2 +gnureadline==8.1.2 +idna==3.10 +jsonable==0.3.1 +jsonschema==4.23.0 +jsonschema-specifications==2025.4.1 +mediawiki-utilities==0.4.18 +mw==0.4.0 +mwcli==0.0.3 +mwdiffs==0.0.2 +mwpersistence==0.2.4 +mwreverts==0.1.5 +mwtypes==0.4.0 +mwxml==0.3.6 +numpy==1.26.4 +pandas==2.2.3 +para==0.0.8 +parsimonious==0.10.0 +pyarrow==20.0.0 +pydub==0.25.1 +PyMySQL==1.1.1 +python-dateutil==2.9.0.post0 +pytz==2025.2 +PyYAML==5.4.1 +referencing==0.36.2 +regex==2024.11.6 +requests==2.32.3 +rpds-py==0.25.1 +setuptools==80.8.0 +six==1.17.0 +stopit==1.1.2 +typing_extensions==4.13.2 +tzdata==2025.2 +urllib3==2.4.0 +wheel==0.45.1 +yamlconf==0.2.6 diff --git a/test/Wikiq_Unit_Test.py b/test/Wikiq_Unit_Test.py index 0a90c6c..7f4df39 100644 --- a/test/Wikiq_Unit_Test.py +++ b/test/Wikiq_Unit_Test.py @@ -5,11 +5,14 @@ from shutil import copyfile import pandas as pd from pandas.testing import assert_frame_equal from io import StringIO +import tracemalloc + +tracemalloc.start() # with / without pwr DONE # with / without url encode DONE # with / without collapse user DONE -# with output to sdtout DONE +# with output to stdout DONE # note that the persistence radius is 7 by default # reading various file formats including # 7z, gz, bz2, xml DONE