Prefix page titles with namespace names.
This commit is contained in:
17
wikiq
17
wikiq
@@ -3,6 +3,7 @@
|
||||
# original wikiq headers are: title articleid revid date_time anon
|
||||
# editor editor_id minor text_size text_entropy text_md5 reversion
|
||||
# additions_size deletions_size
|
||||
import pdb
|
||||
import argparse
|
||||
import sys
|
||||
import os, os.path
|
||||
@@ -32,11 +33,15 @@ class WikiqIterator():
|
||||
self.fh = fh
|
||||
self.collapse_user = collapse_user
|
||||
self.mwiterator = Dump.from_file(self.fh)
|
||||
self.namespace_map = { ns.id : ns.name for ns in
|
||||
self.mwiterator.site_info.namespaces }
|
||||
self.__pages = self.load_pages()
|
||||
|
||||
def load_pages(self):
|
||||
for page in self.mwiterator:
|
||||
yield WikiqPage(page, collapse_user=self.collapse_user)
|
||||
yield WikiqPage(page,
|
||||
namespace_map = self.namespace_map,
|
||||
collapse_user=self.collapse_user)
|
||||
|
||||
def __iter__(self):
|
||||
return self.__pages
|
||||
@@ -49,13 +54,14 @@ class WikiqPage():
|
||||
'restrictions', 'mwpage', '__revisions',
|
||||
'collapse_user')
|
||||
|
||||
def __init__(self, page, collapse_user=False):
|
||||
def __init__(self, page, namespace_map, collapse_user=False):
|
||||
self.id = page.id
|
||||
self.title = page.title
|
||||
self.namespace = page.namespace
|
||||
self.redirect = page.redirect
|
||||
if page.namespace != 0:
|
||||
self.title = ':'.join([namespace_map[page.namespace], page.title])
|
||||
else:
|
||||
self.title = page.title
|
||||
self.restrictions = page.restrictions
|
||||
|
||||
self.collapse_user = collapse_user
|
||||
self.mwpage = page
|
||||
self.__revisions = self.rev_list()
|
||||
@@ -111,7 +117,6 @@ class WikiqPage():
|
||||
|
||||
class WikiqParser():
|
||||
|
||||
|
||||
def __init__(self, input_file, output_file, collapse_user=False, persist=False, urlencode=False, persist_legacy=False):
|
||||
|
||||
self.input_file = input_file
|
||||
|
||||
Reference in New Issue
Block a user