a number of small updates and fixes
- fix regex for filename/filetype matches - unload all files not just ones with end with xml in 7z archives - fix bug that broke stdout - minor cosmetic fixes - updated mediawiki-utilities submodule to latest version
This commit is contained in:
parent
3f9da40747
commit
ba886ecf4c
@ -1 +1 @@
|
|||||||
Subproject commit beba46e3eee8e0582cc3a5515dfa658ffbd18f9d
|
Subproject commit f7329417ebb2f03d1e9b8a626236a3c0ce65c814
|
27
wikiq
27
wikiq
@ -272,11 +272,11 @@ class WikiqParser():
|
|||||||
|
|
||||||
|
|
||||||
def open_input_file(input_filename):
|
def open_input_file(input_filename):
|
||||||
if re.match(r'.*\.7z', input_filename):
|
if re.match(r'.*\.7z$', input_filename):
|
||||||
cmd = ["7za", "x", "-so", input_filename, '*.xml']
|
cmd = ["7za", "x", "-so", input_filename, '*']
|
||||||
elif re.match(r'.*\.gz', input_filename):
|
elif re.match(r'.*\.gz$', input_filename):
|
||||||
cmd = ["zcat", input_filename]
|
cmd = ["zcat", input_filename]
|
||||||
elif re.match(r'.*\.bz2', input_filename):
|
elif re.match(r'.*\.bz2$', input_filename):
|
||||||
cmd = ["zcat", input_filename]
|
cmd = ["zcat", input_filename]
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@ -322,24 +322,25 @@ if len(args.dumpfiles) > 0:
|
|||||||
for filename in args.dumpfiles:
|
for filename in args.dumpfiles:
|
||||||
input_file = open_input_file(filename)
|
input_file = open_input_file(filename)
|
||||||
|
|
||||||
# open file for output
|
# open directory for output
|
||||||
|
if args.output_dir:
|
||||||
|
output_dir = args.output_dir[0]
|
||||||
|
else:
|
||||||
|
output_dir = "."
|
||||||
|
|
||||||
|
print("Processing file: %s" % filename, file=sys.stderr)
|
||||||
|
|
||||||
if args.stdout:
|
if args.stdout:
|
||||||
output_file = sys.stdout
|
output_file = sys.stdout
|
||||||
else:
|
else:
|
||||||
if args.output_dir:
|
|
||||||
output_dir = args.output_dir[0]
|
|
||||||
else:
|
|
||||||
output_dir = "."
|
|
||||||
|
|
||||||
filename = os.path.join(output_dir, os.path.basename(filename))
|
filename = os.path.join(output_dir, os.path.basename(filename))
|
||||||
output_file = open_output_file(filename)
|
output_file = open_output_file(filename)
|
||||||
|
|
||||||
wikiq = WikiqParser(input_file, output_file,
|
wikiq = WikiqParser(input_file, output_file,
|
||||||
collapse_user=args.collapse_user,
|
collapse_user=args.collapse_user,
|
||||||
persist=args.persist,
|
persist=args.persist,
|
||||||
urlencode=args.urlencode)
|
urlencode=args.urlencode)
|
||||||
|
|
||||||
print("Processing file: %s" % filename, file=sys.stderr)
|
|
||||||
|
|
||||||
wikiq.process()
|
wikiq.process()
|
||||||
|
|
||||||
@ -348,7 +349,7 @@ if len(args.dumpfiles) > 0:
|
|||||||
output_file.close()
|
output_file.close()
|
||||||
else:
|
else:
|
||||||
wikiq = WikiqParser(sys.stdin, sys.stdout,
|
wikiq = WikiqParser(sys.stdin, sys.stdout,
|
||||||
collapse_user=args.collapse_user,
|
collapse_user=args.collapse_user,
|
||||||
persist=args.persist,
|
persist=args.persist,
|
||||||
urlencode=args.urlencode)
|
urlencode=args.urlencode)
|
||||||
wikiq.process()
|
wikiq.process()
|
||||||
|
Loading…
Reference in New Issue
Block a user