diff --git a/wikiq b/wikiq
index 3c1c904..a171cbf 100755
--- a/wikiq
+++ b/wikiq
@@ -8,14 +8,13 @@ import argparse
 import sys
 import os.path
 import re
-from datetime import datetime, timezone
 from io import TextIOWrapper
 from itertools import groupby
 
 from subprocess import Popen, PIPE
 from collections import deque
 from hashlib import sha1
-from typing import Any, IO, TextIO, Final, Generator
+from typing import Any, IO, TextIO, Generator
 
 import mwxml
 from mwxml import Dump
@@ -24,8 +23,6 @@ from deltas.tokenizers import wikitext_split
 import mwpersistence
 import mwreverts
 
-from pyarrow import Schema
-
 import tables
 from tables import RevisionTable
 
@@ -33,7 +30,6 @@ TO_ENCODE = ('title', 'editor')
 PERSISTENCE_RADIUS = 7
 from deltas import SequenceMatcher, SegmentMatcher
 
-import dataclasses as dc
 import pyarrow as pa
 import pyarrow.parquet as pq
 import pyarrow.csv as pc
@@ -193,93 +189,6 @@ class RegexPair(object):
         return temp_dict
 
 
-def pa_schema() -> pa.Schema:
-    fields: list[pa.Field] = [
-        pa.field("revid", pa.int64()),
-        pa.field("date_time", pa.timestamp('s')),
-        pa.field("articleid", pa.int64()),
-        pa.field("editorid", pa.int64(), nullable=True),
-        pa.field("title", pa.string()),
-        pa.field("namespace", pa.int32()),
-        pa.field("deleted", pa.bool_()),
-        pa.field("text_chars", pa.int32()),
-        pa.field("comment_chars", pa.int32()),
-        pa.field("revert", pa.bool_(), nullable=True),
-        # reverteds is a string which contains a comma-separated list of reverted revision ids.
-        pa.field("reverteds", pa.string(), nullable=True),
-        pa.field("sha1", pa.string()),
-        pa.field("minor", pa.bool_()),
-        pa.field("editor", pa.string()),
-        pa.field("anon", pa.bool_())
-    ]
-    return pa.schema(fields)
-
-
-"""
-
-We used to use a dictionary to collect fields for the output. 
-Now we use dataclasses. Compared to a dictionary, this should help:
-- prevent some bugs
-- make it easier to output parquet data. 
-- use class attribute '.' syntax instead of dictionary syntax. 
-- improve support for tooling (autocomplete, type hints)
-- use type information to define formatting rules
-
-Depending on the parameters passed into Wikiq, the output schema can be different. 
-Therefore, we need to end up constructing a dataclass with the correct output schema. 
-It also needs to have the correct pyarrow schema so we can write parquet files.
-
-The RevDataBase type has all the fields that will be output no matter how wikiq is invoked.
-"""
-
-
-@dc.dataclass()
-class Revision:
-    revid: int
-    date_time: datetime
-    articleid: int
-    editorid: int
-    title: str
-    namespace: int
-    deleted: bool
-    text_chars: int | None = None
-    comment_chars: int | None = None
-    revert: bool | None = None
-    reverteds: str = None
-    sha1: str | None = None
-    minor: bool | None = None
-    editor: str | None = None
-    anon: bool | None = None
-
-    # defines pyarrow schema.
-    # each field in the data class needs an entry in this array.
-    # the names should match and be in the same order.
-    # this isn't a dataclass field since it doesn't have a type annotation
-    pa_schema_fields = [
-        pa.field("revid", pa.int64()),
-        pa.field("date_time", pa.timestamp('s')),
-        pa.field("articleid", pa.int64()),
-        pa.field("editorid", pa.int64(), nullable=True),
-        pa.field("title", pa.string()),
-        pa.field("namespace", pa.int32()),
-        pa.field("deleted", pa.bool_()),
-        pa.field("text_chars", pa.int32()),
-        # pa.field("comment_chars", pa.int32()),
-        pa.field("revert", pa.bool_(), nullable=True),
-        # reverteds is a string which contains a comma-separated list of reverted revision ids.
-        pa.field("reverteds", pa.string(), nullable=True),
-        pa.field("sha1", pa.string()),
-        pa.field("minor", pa.bool_()),
-        pa.field("editor", pa.string()),
-        pa.field("anon", pa.bool_())
-    ]
-
-    # pyarrow is a columnar format, so most of the work happens in the flush_parquet_buffer function
-    def to_pyarrow(self) -> pa.RecordBatch:
-        d = dc.asdict(self)
-        lists = [[d[field.name]] for field in self.pa_schema_fields]
-        return pa.record_batch(lists, schema=pa.schema(self.pa_schema_fields))
-
 class WikiqParser:
     def __init__(self,
                  input_file: TextIOWrapper | IO[Any] | IO[bytes],
@@ -311,7 +220,8 @@ class WikiqParser:
             self.namespace_filter = None
 
         self.regex_schemas = []
-        self.regex_revision_pairs: list[RegexPair] = self.make_matchmake_pairs(regex_match_revision, regex_revision_label)
+        self.regex_revision_pairs: list[RegexPair] = self.make_matchmake_pairs(regex_match_revision,
+                                                                               regex_revision_label)
         self.regex_comment_pairs: list[RegexPair] = self.make_matchmake_pairs(regex_match_comment, regex_comment_label)
 
         # here we initialize the variables we need for output.
@@ -512,9 +422,11 @@ class WikiqParser:
                 buffer['tokens_window'] = []
 
                 if self.persist == PersistMethod.sequence:
-                    state = mwpersistence.DiffState(SequenceMatcher(tokenizer=wikitext_split), revert_radius=PERSISTENCE_RADIUS)
+                    state = mwpersistence.DiffState(SequenceMatcher(tokenizer=wikitext_split),
+                                                    revert_radius=PERSISTENCE_RADIUS)
                 elif self.persist == PersistMethod.segment:
-                    state = mwpersistence.DiffState(SegmentMatcher(tokenizer=wikitext_split), revert_radius=PERSISTENCE_RADIUS)
+                    state = mwpersistence.DiffState(SegmentMatcher(tokenizer=wikitext_split),
+                                                    revert_radius=PERSISTENCE_RADIUS)
                 else:
                     from mw.lib import persistence
                     state = persistence.State()
@@ -551,7 +463,7 @@ class WikiqParser:
                     buffer['token_revs'].append(num_token_revs)
                     buffer['tokens_added'].append(num_tokens)
                     buffer['tokens_removed'].append(len(tokens_removed))
-                    buffer['tokens_window'].append(len(window) - (i+1))
+                    buffer['tokens_window'].append(len(window) - (i + 1))
 
             writer.write(pa.table(buffer, schema=schema))