Prefix page titles with namespace names.
This commit is contained in:
Submodule Mediawiki-Utilities deleted from f7329417eb
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
17
wikiq
17
wikiq
@@ -3,6 +3,7 @@
|
|||||||
# original wikiq headers are: title articleid revid date_time anon
|
# original wikiq headers are: title articleid revid date_time anon
|
||||||
# editor editor_id minor text_size text_entropy text_md5 reversion
|
# editor editor_id minor text_size text_entropy text_md5 reversion
|
||||||
# additions_size deletions_size
|
# additions_size deletions_size
|
||||||
|
import pdb
|
||||||
import argparse
|
import argparse
|
||||||
import sys
|
import sys
|
||||||
import os, os.path
|
import os, os.path
|
||||||
@@ -32,11 +33,15 @@ class WikiqIterator():
|
|||||||
self.fh = fh
|
self.fh = fh
|
||||||
self.collapse_user = collapse_user
|
self.collapse_user = collapse_user
|
||||||
self.mwiterator = Dump.from_file(self.fh)
|
self.mwiterator = Dump.from_file(self.fh)
|
||||||
|
self.namespace_map = { ns.id : ns.name for ns in
|
||||||
|
self.mwiterator.site_info.namespaces }
|
||||||
self.__pages = self.load_pages()
|
self.__pages = self.load_pages()
|
||||||
|
|
||||||
def load_pages(self):
|
def load_pages(self):
|
||||||
for page in self.mwiterator:
|
for page in self.mwiterator:
|
||||||
yield WikiqPage(page, collapse_user=self.collapse_user)
|
yield WikiqPage(page,
|
||||||
|
namespace_map = self.namespace_map,
|
||||||
|
collapse_user=self.collapse_user)
|
||||||
|
|
||||||
def __iter__(self):
|
def __iter__(self):
|
||||||
return self.__pages
|
return self.__pages
|
||||||
@@ -49,13 +54,14 @@ class WikiqPage():
|
|||||||
'restrictions', 'mwpage', '__revisions',
|
'restrictions', 'mwpage', '__revisions',
|
||||||
'collapse_user')
|
'collapse_user')
|
||||||
|
|
||||||
def __init__(self, page, collapse_user=False):
|
def __init__(self, page, namespace_map, collapse_user=False):
|
||||||
self.id = page.id
|
self.id = page.id
|
||||||
self.title = page.title
|
|
||||||
self.namespace = page.namespace
|
self.namespace = page.namespace
|
||||||
self.redirect = page.redirect
|
if page.namespace != 0:
|
||||||
|
self.title = ':'.join([namespace_map[page.namespace], page.title])
|
||||||
|
else:
|
||||||
|
self.title = page.title
|
||||||
self.restrictions = page.restrictions
|
self.restrictions = page.restrictions
|
||||||
|
|
||||||
self.collapse_user = collapse_user
|
self.collapse_user = collapse_user
|
||||||
self.mwpage = page
|
self.mwpage = page
|
||||||
self.__revisions = self.rev_list()
|
self.__revisions = self.rev_list()
|
||||||
@@ -111,7 +117,6 @@ class WikiqPage():
|
|||||||
|
|
||||||
class WikiqParser():
|
class WikiqParser():
|
||||||
|
|
||||||
|
|
||||||
def __init__(self, input_file, output_file, collapse_user=False, persist=False, urlencode=False, persist_legacy=False):
|
def __init__(self, input_file, output_file, collapse_user=False, persist=False, urlencode=False, persist_legacy=False):
|
||||||
|
|
||||||
self.input_file = input_file
|
self.input_file = input_file
|
||||||
|
|||||||
Reference in New Issue
Block a user