Add docs and rename import pc -> pacsv
Signed-off-by: Will Beason <willbeason@gmail.com>
This commit is contained in:
		
							parent
							
								
									586ae85c65
								
							
						
					
					
						commit
						11d2587471
					
				| @ -33,6 +33,10 @@ class RevisionField(ABC, Generic[T]): | ||||
|         :param revisions: The set of revisions to compute the field from. | ||||
|         Revisions are passed in chronological order, so use revisions[-1] to | ||||
|         access the most recent revision in the set. | ||||
| 
 | ||||
|         Implementations of extract should handle the case where revisions is | ||||
|         either a single revision (collapse-user=FALSE), or a full edit session | ||||
|         of contiguous edits by the same user (collapse-user=TRUE). | ||||
|         """ | ||||
|         pass | ||||
| 
 | ||||
|  | ||||
							
								
								
									
										6
									
								
								wikiq
									
									
									
									
									
								
							
							
						
						
									
										6
									
								
								wikiq
									
									
									
									
									
								
							| @ -32,7 +32,7 @@ from deltas import SequenceMatcher, SegmentMatcher | ||||
| 
 | ||||
| import pyarrow as pa | ||||
| import pyarrow.parquet as pq | ||||
| import pyarrow.csv as pc | ||||
| import pyarrow.csv as pacsv | ||||
| 
 | ||||
| 
 | ||||
| class PersistMethod: | ||||
| @ -338,7 +338,7 @@ class WikiqParser: | ||||
|         page_count = 0 | ||||
|         rev_count = 0 | ||||
| 
 | ||||
|         writer: Union[pq.ParquetWriter, pc.CSVWriter] | ||||
|         writer: Union[pq.ParquetWriter, pacsv.CSVWriter] | ||||
| 
 | ||||
|         schema = table.schema() | ||||
|         schema = schema.append(pa.field('revert', pa.bool_(), nullable=True)) | ||||
| @ -362,7 +362,7 @@ class WikiqParser: | ||||
|         if self.output_parquet: | ||||
|             writer = pq.ParquetWriter(self.output_file, schema, flavor='spark') | ||||
|         else: | ||||
|             writer = pc.CSVWriter(self.output_file, schema, write_options=pc.WriteOptions(delimiter='\t')) | ||||
|             writer = pacsv.CSVWriter(self.output_file, schema, write_options=pacsv.WriteOptions(delimiter='\t')) | ||||
| 
 | ||||
|         regex_matches = {} | ||||
| 
 | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user