a number of small updates and fixes

- fix regex for filename/filetype matches
- unload all files not just ones with end with xml in 7z archives
- fix bug that broke stdout
- minor cosmetic fixes
- updated mediawiki-utilities submodule to latest version
This commit is contained in:
Benjamin Mako Hill 2018-05-17 14:37:20 -07:00
parent 3f9da40747
commit ba886ecf4c
2 changed files with 15 additions and 14 deletions

@ -1 +1 @@
Subproject commit beba46e3eee8e0582cc3a5515dfa658ffbd18f9d
Subproject commit f7329417ebb2f03d1e9b8a626236a3c0ce65c814

27
wikiq
View File

@ -272,11 +272,11 @@ class WikiqParser():
def open_input_file(input_filename):
if re.match(r'.*\.7z', input_filename):
cmd = ["7za", "x", "-so", input_filename, '*.xml']
elif re.match(r'.*\.gz', input_filename):
if re.match(r'.*\.7z$', input_filename):
cmd = ["7za", "x", "-so", input_filename, '*']
elif re.match(r'.*\.gz$', input_filename):
cmd = ["zcat", input_filename]
elif re.match(r'.*\.bz2', input_filename):
elif re.match(r'.*\.bz2$', input_filename):
cmd = ["zcat", input_filename]
try:
@ -322,24 +322,25 @@ if len(args.dumpfiles) > 0:
for filename in args.dumpfiles:
input_file = open_input_file(filename)
# open file for output
# open directory for output
if args.output_dir:
output_dir = args.output_dir[0]
else:
output_dir = "."
print("Processing file: %s" % filename, file=sys.stderr)
if args.stdout:
output_file = sys.stdout
else:
if args.output_dir:
output_dir = args.output_dir[0]
else:
output_dir = "."
filename = os.path.join(output_dir, os.path.basename(filename))
output_file = open_output_file(filename)
wikiq = WikiqParser(input_file, output_file,
collapse_user=args.collapse_user,
collapse_user=args.collapse_user,
persist=args.persist,
urlencode=args.urlencode)
print("Processing file: %s" % filename, file=sys.stderr)
wikiq.process()
@ -348,7 +349,7 @@ if len(args.dumpfiles) > 0:
output_file.close()
else:
wikiq = WikiqParser(sys.stdin, sys.stdout,
collapse_user=args.collapse_user,
collapse_user=args.collapse_user,
persist=args.persist,
urlencode=args.urlencode)
wikiq.process()