Add docs and rename import pc -> pacsv
Signed-off-by: Will Beason <willbeason@gmail.com>
This commit is contained in:
parent
586ae85c65
commit
11d2587471
@ -33,6 +33,10 @@ class RevisionField(ABC, Generic[T]):
|
||||
:param revisions: The set of revisions to compute the field from.
|
||||
Revisions are passed in chronological order, so use revisions[-1] to
|
||||
access the most recent revision in the set.
|
||||
|
||||
Implementations of extract should handle the case where revisions is
|
||||
either a single revision (collapse-user=FALSE), or a full edit session
|
||||
of contiguous edits by the same user (collapse-user=TRUE).
|
||||
"""
|
||||
pass
|
||||
|
||||
|
6
wikiq
6
wikiq
@ -32,7 +32,7 @@ from deltas import SequenceMatcher, SegmentMatcher
|
||||
|
||||
import pyarrow as pa
|
||||
import pyarrow.parquet as pq
|
||||
import pyarrow.csv as pc
|
||||
import pyarrow.csv as pacsv
|
||||
|
||||
|
||||
class PersistMethod:
|
||||
@ -338,7 +338,7 @@ class WikiqParser:
|
||||
page_count = 0
|
||||
rev_count = 0
|
||||
|
||||
writer: Union[pq.ParquetWriter, pc.CSVWriter]
|
||||
writer: Union[pq.ParquetWriter, pacsv.CSVWriter]
|
||||
|
||||
schema = table.schema()
|
||||
schema = schema.append(pa.field('revert', pa.bool_(), nullable=True))
|
||||
@ -362,7 +362,7 @@ class WikiqParser:
|
||||
if self.output_parquet:
|
||||
writer = pq.ParquetWriter(self.output_file, schema, flavor='spark')
|
||||
else:
|
||||
writer = pc.CSVWriter(self.output_file, schema, write_options=pc.WriteOptions(delimiter='\t'))
|
||||
writer = pacsv.CSVWriter(self.output_file, schema, write_options=pacsv.WriteOptions(delimiter='\t'))
|
||||
|
||||
regex_matches = {}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user