a number of small updates and fixes

- fix regex for filename/filetype matches
- unload all files not just ones with end with xml in 7z archives
- fix bug that broke stdout
- minor cosmetic fixes
- updated mediawiki-utilities submodule to latest version
This commit is contained in:
2018-05-17 14:37:20 -07:00
parent 3f9da40747
commit ba886ecf4c
2 changed files with 15 additions and 14 deletions

27
wikiq
View File

@@ -272,11 +272,11 @@ class WikiqParser():
def open_input_file(input_filename): def open_input_file(input_filename):
if re.match(r'.*\.7z', input_filename): if re.match(r'.*\.7z$', input_filename):
cmd = ["7za", "x", "-so", input_filename, '*.xml'] cmd = ["7za", "x", "-so", input_filename, '*']
elif re.match(r'.*\.gz', input_filename): elif re.match(r'.*\.gz$', input_filename):
cmd = ["zcat", input_filename] cmd = ["zcat", input_filename]
elif re.match(r'.*\.bz2', input_filename): elif re.match(r'.*\.bz2$', input_filename):
cmd = ["zcat", input_filename] cmd = ["zcat", input_filename]
try: try:
@@ -322,24 +322,25 @@ if len(args.dumpfiles) > 0:
for filename in args.dumpfiles: for filename in args.dumpfiles:
input_file = open_input_file(filename) input_file = open_input_file(filename)
# open file for output # open directory for output
if args.output_dir:
output_dir = args.output_dir[0]
else:
output_dir = "."
print("Processing file: %s" % filename, file=sys.stderr)
if args.stdout: if args.stdout:
output_file = sys.stdout output_file = sys.stdout
else: else:
if args.output_dir:
output_dir = args.output_dir[0]
else:
output_dir = "."
filename = os.path.join(output_dir, os.path.basename(filename)) filename = os.path.join(output_dir, os.path.basename(filename))
output_file = open_output_file(filename) output_file = open_output_file(filename)
wikiq = WikiqParser(input_file, output_file, wikiq = WikiqParser(input_file, output_file,
collapse_user=args.collapse_user, collapse_user=args.collapse_user,
persist=args.persist, persist=args.persist,
urlencode=args.urlencode) urlencode=args.urlencode)
print("Processing file: %s" % filename, file=sys.stderr)
wikiq.process() wikiq.process()
@@ -348,7 +349,7 @@ if len(args.dumpfiles) > 0:
output_file.close() output_file.close()
else: else:
wikiq = WikiqParser(sys.stdin, sys.stdout, wikiq = WikiqParser(sys.stdin, sys.stdout,
collapse_user=args.collapse_user, collapse_user=args.collapse_user,
persist=args.persist, persist=args.persist,
urlencode=args.urlencode) urlencode=args.urlencode)
wikiq.process() wikiq.process()