update deps and add edit_summary to wikiq output.
This commit is contained in:
		
							parent
							
								
									22d14dc5f2
								
							
						
					
					
						commit
						bd22d26291
					
				| @ -14,12 +14,11 @@ dependencies = [ | ||||
|     "yamlconf", | ||||
| ] | ||||
| 
 | ||||
| # [tool.uv.sources] | ||||
| # yamlconf = { git = "https://github.com/groceryheist/yamlconf" } | ||||
| # mwxml = { git = "https://github.com/groceryheist/python-mwxml" } | ||||
| [tool.uv.sources] | ||||
| yamlconf = { git = "https://github.com/groceryheist/yamlconf" } | ||||
| mwxml = { git = "https://github.com/groceryheist/python-mwxml" } | ||||
| 
 | ||||
| [dependency-groups] | ||||
| dev = [ | ||||
|     "pandas>=2.1.0", | ||||
|     "pytest>=8.3.5", | ||||
|     "pandas>=2.1.0" | ||||
| ] | ||||
|  | ||||
							
								
								
									
										8
									
								
								wikiq
									
									
									
									
									
								
							
							
						
						
									
										8
									
								
								wikiq
									
									
									
									
									
								
							| @ -242,6 +242,7 @@ class RevDataBase: | ||||
|     title: str | ||||
|     namespace: int | ||||
|     deleted: bool | ||||
|     edit_summary: str | ||||
|     text_chars: int = None | ||||
|     revert: bool = None | ||||
|     reverteds: list[int] = None | ||||
| @ -271,7 +272,8 @@ class RevDataBase: | ||||
|         pa.field("sha1",pa.string()), | ||||
|         pa.field("minor",pa.bool_()), | ||||
|         pa.field("editor",pa.string()), | ||||
|         pa.field("anon",pa.bool_()) | ||||
|         pa.field("anon",pa.bool_()), | ||||
|         pa.field("edit_summary",pa.bool_()) | ||||
|     ] | ||||
| 
 | ||||
|     # pyarrow is a columnar format, so most of the work happens in the flush_parquet_buffer function | ||||
| @ -535,7 +537,8 @@ class WikiqParser: | ||||
|                                              editorid="" if rev.deleted.user == True or rev.user.id is None else rev.user.id, | ||||
|                                              title=page.title, | ||||
|                                              deleted=rev.deleted.text, | ||||
|                                              namespace=namespace | ||||
|                                              namespace=namespace, | ||||
|                                              edit_summary=rev.comment | ||||
|                                              ) | ||||
| 
 | ||||
|                 rev_data = self.matchmake_revision(rev, rev_data) | ||||
| @ -717,6 +720,7 @@ def match_archive_suffix(input_filename): | ||||
| 
 | ||||
| def open_input_file(input_filename, fandom_2020=False): | ||||
|     cmd = match_archive_suffix(input_filename) | ||||
| 
 | ||||
|     if fandom_2020: | ||||
|         cmd.append("*.xml") | ||||
|     try: | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user