Add docs and rename import pc -> pacsv
Signed-off-by: Will Beason <willbeason@gmail.com>
This commit is contained in:
		
							parent
							
								
									586ae85c65
								
							
						
					
					
						commit
						11d2587471
					
				| @ -33,6 +33,10 @@ class RevisionField(ABC, Generic[T]): | |||||||
|         :param revisions: The set of revisions to compute the field from. |         :param revisions: The set of revisions to compute the field from. | ||||||
|         Revisions are passed in chronological order, so use revisions[-1] to |         Revisions are passed in chronological order, so use revisions[-1] to | ||||||
|         access the most recent revision in the set. |         access the most recent revision in the set. | ||||||
|  | 
 | ||||||
|  |         Implementations of extract should handle the case where revisions is | ||||||
|  |         either a single revision (collapse-user=FALSE), or a full edit session | ||||||
|  |         of contiguous edits by the same user (collapse-user=TRUE). | ||||||
|         """ |         """ | ||||||
|         pass |         pass | ||||||
| 
 | 
 | ||||||
|  | |||||||
							
								
								
									
										6
									
								
								wikiq
									
									
									
									
									
								
							
							
						
						
									
										6
									
								
								wikiq
									
									
									
									
									
								
							| @ -32,7 +32,7 @@ from deltas import SequenceMatcher, SegmentMatcher | |||||||
| 
 | 
 | ||||||
| import pyarrow as pa | import pyarrow as pa | ||||||
| import pyarrow.parquet as pq | import pyarrow.parquet as pq | ||||||
| import pyarrow.csv as pc | import pyarrow.csv as pacsv | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class PersistMethod: | class PersistMethod: | ||||||
| @ -338,7 +338,7 @@ class WikiqParser: | |||||||
|         page_count = 0 |         page_count = 0 | ||||||
|         rev_count = 0 |         rev_count = 0 | ||||||
| 
 | 
 | ||||||
|         writer: Union[pq.ParquetWriter, pc.CSVWriter] |         writer: Union[pq.ParquetWriter, pacsv.CSVWriter] | ||||||
| 
 | 
 | ||||||
|         schema = table.schema() |         schema = table.schema() | ||||||
|         schema = schema.append(pa.field('revert', pa.bool_(), nullable=True)) |         schema = schema.append(pa.field('revert', pa.bool_(), nullable=True)) | ||||||
| @ -362,7 +362,7 @@ class WikiqParser: | |||||||
|         if self.output_parquet: |         if self.output_parquet: | ||||||
|             writer = pq.ParquetWriter(self.output_file, schema, flavor='spark') |             writer = pq.ParquetWriter(self.output_file, schema, flavor='spark') | ||||||
|         else: |         else: | ||||||
|             writer = pc.CSVWriter(self.output_file, schema, write_options=pc.WriteOptions(delimiter='\t')) |             writer = pacsv.CSVWriter(self.output_file, schema, write_options=pacsv.WriteOptions(delimiter='\t')) | ||||||
| 
 | 
 | ||||||
|         regex_matches = {} |         regex_matches = {} | ||||||
| 
 | 
 | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user