diff --git a/.gitignore b/.gitignore
index d5257ec..1ae46ba 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,6 +4,9 @@
 *.xml.xz
 *.swp
 
+# Lockfiles
+uv.lock
+
 # JetBrains
 /.idea
 
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..eafb09f
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,26 @@
+[project]
+name = "mediawiki-dump-tools"
+version = "0.1.0"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.11"
+dependencies = [
+    "deltas>=0.7.0",
+    "mediawiki-utilities>=0.4.18",
+    "mwpersistence>=0.2.4",
+    "mwreverts>=0.1.5",
+    "mwtypes>=0.4.0",
+    "mwxml>=0.3.6",
+    "pyarrow>=20.0.0",
+    "yamlconf",
+]
+
+[tool.uv.sources]
+yamlconf = { git = "https://github.com/groceryheist/yamlconf" }
+mwxml = { git = "https://github.com/groceryheist/python-mwxml" }
+
+[dependency-groups]
+dev = [
+    "pandas>=2.1.0",
+    "pytest>=8.3.5",
+]
diff --git a/requirements.txt b/requirements.txt
deleted file mode 100644
index 635dc23..0000000
--- a/requirements.txt
+++ /dev/null
@@ -1,40 +0,0 @@
-attrs==25.3.0
-certifi==2025.4.26
-charset-normalizer==3.4.2
-Cython==0.29.37
-deltas==0.7.0
-docopt==0.6.2
-gnureadline==8.1.2
-idna==3.10
-jsonable==0.3.1
-jsonschema==4.23.0
-jsonschema-specifications==2025.4.1
-mediawiki-utilities==0.4.18
-mwcli==0.0.3
-mwdiffs==0.0.2
-mwpersistence==0.2.4
-mwreverts==0.1.5
-mwtypes==0.4.0
-mwxml==0.3.6
-numpy==2.2.6
-pandas==2.2.3
-para==0.0.8
-parsimonious==0.10.0
-pyarrow==20.0.0
-pydub==0.25.1
-PyMySQL==1.1.1
-python-dateutil==2.9.0.post0
-pytz==2025.2
-PyYAML==5.4.1
-referencing==0.36.2
-regex==2024.11.6
-requests==2.32.3
-rpds-py==0.25.1
-setuptools==80.8.0
-six==1.17.0
-stopit==1.1.2
-typing_extensions==4.13.2
-tzdata==2025.2
-urllib3==2.4.0
-wheel==0.45.1
-yamlconf==0.2.6
diff --git a/test/Wikiq_Unit_Test.py b/test/Wikiq_Unit_Test.py
index 9ae9da0..18d963d 100644
--- a/test/Wikiq_Unit_Test.py
+++ b/test/Wikiq_Unit_Test.py
@@ -1,4 +1,3 @@
-import math
 import unittest
 import os
 import subprocess
@@ -7,12 +6,10 @@ from shutil import copyfile
 import numpy as np
 import pandas as pd
 from pandas import DataFrame
-from pandas._testing import assert_series_equal
-from pandas.testing import assert_frame_equal
+from pandas.testing import assert_frame_equal, assert_series_equal
 from io import StringIO
 import tracemalloc
 from typing import Final
-from datetime import datetime
 
 # Make references to files and wikiq relative to this file, not to the current working directory.
 TEST_DIR: Final[str] = os.path.dirname(os.path.realpath(__file__))
@@ -181,7 +178,7 @@ class WikiqTestCase(unittest.TestCase):
         tester = WikiqTester(SAILORMOON, "collapse-user", in_compression="7z")
 
         try:
-            tester.call_wikiq("--collapse-user")
+            tester.call_wikiq("--collapse-user", "--fandom-2020")
         except subprocess.CalledProcessError as exc:
             self.fail(exc.stderr.decode("utf8"))
 
@@ -195,7 +192,7 @@ class WikiqTestCase(unittest.TestCase):
         tester = WikiqTester(SAILORMOON, "persistence_segment", in_compression="7z")
 
         try:
-            tester.call_wikiq("--persistence segment")
+            tester.call_wikiq("--persistence segment", "--fandom-2020")
         except subprocess.CalledProcessError as exc:
             self.fail(exc.stderr.decode("utf8"))
 
@@ -209,7 +206,7 @@ class WikiqTestCase(unittest.TestCase):
         tester = WikiqTester(SAILORMOON, "persistence_legacy", in_compression="7z")
 
         try:
-            tester.call_wikiq("--persistence legacy")
+            tester.call_wikiq("--persistence legacy", "--fandom-2020")
         except subprocess.CalledProcessError as exc:
             self.fail(exc.stderr.decode("utf8"))
 
@@ -223,7 +220,7 @@ class WikiqTestCase(unittest.TestCase):
         tester = WikiqTester(SAILORMOON, "persistence", in_compression="7z")
 
         try:
-            tester.call_wikiq("--persistence")
+            tester.call_wikiq("--persistence", "--fandom-2020")
         except subprocess.CalledProcessError as exc:
             self.fail(exc.stderr.decode("utf8"))
 
@@ -239,7 +236,7 @@ class WikiqTestCase(unittest.TestCase):
         tester = WikiqTester(SAILORMOON, "url-encode", in_compression="7z")
 
         try:
-            tester.call_wikiq("--url-encode")
+            tester.call_wikiq("--url-encode", "--fandom-2020")
         except subprocess.CalledProcessError as exc:
             self.fail(exc.stderr.decode("utf8"))
 
@@ -266,7 +263,7 @@ class WikiqTestCase(unittest.TestCase):
         tester = WikiqTester(wiki=SAILORMOON, case_name="noargs", in_compression="7z")
 
         try:
-            outs = tester.call_wikiq("--stdout", out=False).decode("utf8")
+            outs = tester.call_wikiq( "--stdout", "--fandom-2020", out=False).decode("utf8")
         except subprocess.CalledProcessError as exc:
             self.fail(exc.stderr.decode("utf8"))
 
diff --git a/wikiq b/wikiq
index ffd4183..1ed2d8e 100755
--- a/wikiq
+++ b/wikiq
@@ -266,13 +266,13 @@ class RevDataBase:
         pa.field("revid", pa.int64()),
         pa.field("date_time", pa.timestamp('ms')),
         pa.field("articleid", pa.int64()),
-        pa.field("editorid", pa.int64()),
+        pa.field("editorid", pa.int64(), nullable=True),
         pa.field("title", pa.string()),
         pa.field("namespace", pa.int32()),
         pa.field("deleted", pa.bool_()),
         pa.field("text_chars", pa.int32()),
-        pa.field("revert", pa.bool_()),
-        pa.field("reverteds", pa.list_(pa.int64())),
+        pa.field("revert", pa.bool_(), nullable=True),
+        pa.field("reverteds", pa.list_(pa.int64()), nullable=True),
         pa.field("sha1", pa.string()),
         pa.field("minor", pa.bool_()),
         pa.field("editor", pa.string()),
@@ -280,7 +280,7 @@ class RevDataBase:
     ]
 
     # pyarrow is a columnar format, so most of the work happens in the flush_parquet_buffer function
-    def to_pyarrow(self) -> tuple[Any, ...]:
+    def to_pyarrow(self):
         return dc.astuple(self)
 
     # logic to convert each field into the wikiq tsv format goes here.
@@ -732,16 +732,22 @@ class WikiqParser:
         print(line, file=self.output_file)
 
 
-def open_input_file(input_filename) -> TextIOWrapper | IO[Any] | IO[bytes]:
+def match_archive_suffix(input_filename):
     if re.match(r'.*\.7z$', input_filename):
-        cmd = ["7za", "x", "-so", input_filename, "*.xml"]
+        cmd = ["7za", "x", "-so", input_filename]
     elif re.match(r'.*\.gz$', input_filename):
         cmd = ["zcat", input_filename]
     elif re.match(r'.*\.bz2$', input_filename):
         cmd = ["bzcat", "-dk", input_filename]
     else:
         raise ValueError("Unrecognized file type: %s" % input_filename)
+    return cmd
 
+
+def open_input_file(input_filename, fandom_2020=False):
+    cmd = match_archive_suffix(input_filename)
+    if fandom_2020:
+        cmd.append("*.xml")
     try:
         return Popen(cmd, stdout=PIPE).stdout
     except NameError:
@@ -814,6 +820,10 @@ def main():
                         action='append',
                         help="The label for the outputted column based on matching the regex in comments.")
 
+    parser.add_argument('--fandom-2020', dest="fandom_2020",
+                        action='store_true',
+                        help="Whether the archive is from the fandom 2020 dumps by Wikiteam. These dumps can have multiple .xml files in their archives.")
+
     args = parser.parse_args()
 
     # set persistence method
@@ -835,7 +845,7 @@ def main():
     if len(args.dumpfiles) > 0:
         output_parquet = False
         for filename in args.dumpfiles:
-            input_file = open_input_file(filename)
+            input_file = open_input_file(filename, args.fandom_2020)
 
             # open directory for output
             if args.output_dir: