Prefix page titles with namespace names.
This commit is contained in:
parent
dba793c6ac
commit
bf396ad366
@ -1 +0,0 @@
|
||||
Subproject commit f7329417ebb2f03d1e9b8a626236a3c0ce65c814
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
17
wikiq
17
wikiq
@ -3,6 +3,7 @@
|
||||
# original wikiq headers are: title articleid revid date_time anon
|
||||
# editor editor_id minor text_size text_entropy text_md5 reversion
|
||||
# additions_size deletions_size
|
||||
import pdb
|
||||
import argparse
|
||||
import sys
|
||||
import os, os.path
|
||||
@ -32,11 +33,15 @@ class WikiqIterator():
|
||||
self.fh = fh
|
||||
self.collapse_user = collapse_user
|
||||
self.mwiterator = Dump.from_file(self.fh)
|
||||
self.namespace_map = { ns.id : ns.name for ns in
|
||||
self.mwiterator.site_info.namespaces }
|
||||
self.__pages = self.load_pages()
|
||||
|
||||
def load_pages(self):
|
||||
for page in self.mwiterator:
|
||||
yield WikiqPage(page, collapse_user=self.collapse_user)
|
||||
yield WikiqPage(page,
|
||||
namespace_map = self.namespace_map,
|
||||
collapse_user=self.collapse_user)
|
||||
|
||||
def __iter__(self):
|
||||
return self.__pages
|
||||
@ -49,13 +54,14 @@ class WikiqPage():
|
||||
'restrictions', 'mwpage', '__revisions',
|
||||
'collapse_user')
|
||||
|
||||
def __init__(self, page, collapse_user=False):
|
||||
def __init__(self, page, namespace_map, collapse_user=False):
|
||||
self.id = page.id
|
||||
self.title = page.title
|
||||
self.namespace = page.namespace
|
||||
self.redirect = page.redirect
|
||||
if page.namespace != 0:
|
||||
self.title = ':'.join([namespace_map[page.namespace], page.title])
|
||||
else:
|
||||
self.title = page.title
|
||||
self.restrictions = page.restrictions
|
||||
|
||||
self.collapse_user = collapse_user
|
||||
self.mwpage = page
|
||||
self.__revisions = self.rev_list()
|
||||
@ -111,7 +117,6 @@ class WikiqPage():
|
||||
|
||||
class WikiqParser():
|
||||
|
||||
|
||||
def __init__(self, input_file, output_file, collapse_user=False, persist=False, urlencode=False, persist_legacy=False):
|
||||
|
||||
self.input_file = input_file
|
||||
|
Loading…
Reference in New Issue
Block a user