Add docs and rename import pc -> pacsv
Signed-off-by: Will Beason <willbeason@gmail.com>
This commit is contained in:
6
wikiq
6
wikiq
@@ -32,7 +32,7 @@ from deltas import SequenceMatcher, SegmentMatcher
|
||||
|
||||
import pyarrow as pa
|
||||
import pyarrow.parquet as pq
|
||||
import pyarrow.csv as pc
|
||||
import pyarrow.csv as pacsv
|
||||
|
||||
|
||||
class PersistMethod:
|
||||
@@ -338,7 +338,7 @@ class WikiqParser:
|
||||
page_count = 0
|
||||
rev_count = 0
|
||||
|
||||
writer: Union[pq.ParquetWriter, pc.CSVWriter]
|
||||
writer: Union[pq.ParquetWriter, pacsv.CSVWriter]
|
||||
|
||||
schema = table.schema()
|
||||
schema = schema.append(pa.field('revert', pa.bool_(), nullable=True))
|
||||
@@ -362,7 +362,7 @@ class WikiqParser:
|
||||
if self.output_parquet:
|
||||
writer = pq.ParquetWriter(self.output_file, schema, flavor='spark')
|
||||
else:
|
||||
writer = pc.CSVWriter(self.output_file, schema, write_options=pc.WriteOptions(delimiter='\t'))
|
||||
writer = pacsv.CSVWriter(self.output_file, schema, write_options=pacsv.WriteOptions(delimiter='\t'))
|
||||
|
||||
regex_matches = {}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user