update deps and add edit_summary to wikiq output.
This commit is contained in:
		
							parent
							
								
									22d14dc5f2
								
							
						
					
					
						commit
						bd22d26291
					
				| @ -14,12 +14,11 @@ dependencies = [ | |||||||
|     "yamlconf", |     "yamlconf", | ||||||
| ] | ] | ||||||
| 
 | 
 | ||||||
| # [tool.uv.sources] | [tool.uv.sources] | ||||||
| # yamlconf = { git = "https://github.com/groceryheist/yamlconf" } | yamlconf = { git = "https://github.com/groceryheist/yamlconf" } | ||||||
| # mwxml = { git = "https://github.com/groceryheist/python-mwxml" } | mwxml = { git = "https://github.com/groceryheist/python-mwxml" } | ||||||
| 
 | 
 | ||||||
| [dependency-groups] | [dependency-groups] | ||||||
| dev = [ | dev = [ | ||||||
|     "pandas>=2.1.0", |     "pandas>=2.1.0" | ||||||
|     "pytest>=8.3.5", |  | ||||||
| ] | ] | ||||||
|  | |||||||
							
								
								
									
										8
									
								
								wikiq
									
									
									
									
									
								
							
							
						
						
									
										8
									
								
								wikiq
									
									
									
									
									
								
							| @ -242,6 +242,7 @@ class RevDataBase: | |||||||
|     title: str |     title: str | ||||||
|     namespace: int |     namespace: int | ||||||
|     deleted: bool |     deleted: bool | ||||||
|  |     edit_summary: str | ||||||
|     text_chars: int = None |     text_chars: int = None | ||||||
|     revert: bool = None |     revert: bool = None | ||||||
|     reverteds: list[int] = None |     reverteds: list[int] = None | ||||||
| @ -271,7 +272,8 @@ class RevDataBase: | |||||||
|         pa.field("sha1",pa.string()), |         pa.field("sha1",pa.string()), | ||||||
|         pa.field("minor",pa.bool_()), |         pa.field("minor",pa.bool_()), | ||||||
|         pa.field("editor",pa.string()), |         pa.field("editor",pa.string()), | ||||||
|         pa.field("anon",pa.bool_()) |         pa.field("anon",pa.bool_()), | ||||||
|  |         pa.field("edit_summary",pa.bool_()) | ||||||
|     ] |     ] | ||||||
| 
 | 
 | ||||||
|     # pyarrow is a columnar format, so most of the work happens in the flush_parquet_buffer function |     # pyarrow is a columnar format, so most of the work happens in the flush_parquet_buffer function | ||||||
| @ -535,7 +537,8 @@ class WikiqParser: | |||||||
|                                              editorid="" if rev.deleted.user == True or rev.user.id is None else rev.user.id, |                                              editorid="" if rev.deleted.user == True or rev.user.id is None else rev.user.id, | ||||||
|                                              title=page.title, |                                              title=page.title, | ||||||
|                                              deleted=rev.deleted.text, |                                              deleted=rev.deleted.text, | ||||||
|                                              namespace=namespace |                                              namespace=namespace, | ||||||
|  |                                              edit_summary=rev.comment | ||||||
|                                              ) |                                              ) | ||||||
| 
 | 
 | ||||||
|                 rev_data = self.matchmake_revision(rev, rev_data) |                 rev_data = self.matchmake_revision(rev, rev_data) | ||||||
| @ -717,6 +720,7 @@ def match_archive_suffix(input_filename): | |||||||
| 
 | 
 | ||||||
| def open_input_file(input_filename, fandom_2020=False): | def open_input_file(input_filename, fandom_2020=False): | ||||||
|     cmd = match_archive_suffix(input_filename) |     cmd = match_archive_suffix(input_filename) | ||||||
|  | 
 | ||||||
|     if fandom_2020: |     if fandom_2020: | ||||||
|         cmd.append("*.xml") |         cmd.append("*.xml") | ||||||
|     try: |     try: | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user