Merge branch 'parquet_support' of gitea:collective/mediawiki_dump_tools into parquet_support

This commit is contained in:
Nathan TeBlunthuis 2025-05-29 18:05:28 -07:00
commit f39ceefa4a
3 changed files with 6 additions and 40 deletions

6
.gitignore vendored
View File

@ -4,9 +4,13 @@
*.xml.xz
*.swp
# Lockfiles
uv.lock
# JetBrains
/.idea
# Python build and test output
__pycache__/
test_output/
/test/test_output/
/test/test_output.parquet/

View File

@ -9,6 +9,7 @@ dependencies = [
"mediawiki-utilities>=0.4.18",
"mwpersistence>=0.2.4",
"mwreverts>=0.1.5",
"mwtypes>=0.4.0",
"mwxml>=0.3.6",
"pyarrow>=20.0.0",
"yamlconf",

View File

@ -1,39 +0,0 @@
attrs==25.3.0
certifi==2025.4.26
charset-normalizer==3.4.2
Cython==0.29.37
deltas==0.7.0
docopt==0.6.2
gnureadline==8.1.2
idna==3.10
jsonable==0.3.1
jsonschema==4.23.0
jsonschema-specifications==2025.4.1
mediawiki-utilities==0.4.18
mwcli==0.0.3
mwdiffs==0.0.2
mwpersistence==0.2.4
mwreverts==0.1.5
mwtypes==0.4.0
mwxml==0.3.6
pandas==2.2.3
para==0.0.8
parsimonious==0.10.0
pyarrow==20.0.0
pydub==0.25.1
PyMySQL==1.1.1
python-dateutil==2.9.0.post0
pytz==2025.2
PyYAML==5.4.1
referencing==0.36.2
regex==2024.11.6
requests==2.32.3
rpds-py==0.25.1
setuptools==80.8.0
six==1.17.0
stopit==1.1.2
typing_extensions==4.13.2
tzdata==2025.2
urllib3==2.4.0
wheel==0.45.1
yamlconf==0.2.6