Add docs and rename import pc -> pacsv

Signed-off-by: Will Beason <willbeason@gmail.com>
This commit is contained in:
Will Beason
2025-06-17 11:46:16 -05:00
parent 586ae85c65
commit 11d2587471
2 changed files with 7 additions and 3 deletions

6
wikiq
View File

@@ -32,7 +32,7 @@ from deltas import SequenceMatcher, SegmentMatcher
import pyarrow as pa
import pyarrow.parquet as pq
import pyarrow.csv as pc
import pyarrow.csv as pacsv
class PersistMethod:
@@ -338,7 +338,7 @@ class WikiqParser:
page_count = 0
rev_count = 0
writer: Union[pq.ParquetWriter, pc.CSVWriter]
writer: Union[pq.ParquetWriter, pacsv.CSVWriter]
schema = table.schema()
schema = schema.append(pa.field('revert', pa.bool_(), nullable=True))
@@ -362,7 +362,7 @@ class WikiqParser:
if self.output_parquet:
writer = pq.ParquetWriter(self.output_file, schema, flavor='spark')
else:
writer = pc.CSVWriter(self.output_file, schema, write_options=pc.WriteOptions(delimiter='\t'))
writer = pacsv.CSVWriter(self.output_file, schema, write_options=pacsv.WriteOptions(delimiter='\t'))
regex_matches = {}