The Python environment runs inside a Singularity container that cannot exec the host's /usr/bin/zstd via subprocess. Replace the subprocess call with the zstandard Python library, which was already a dependency. Other formats (bz2, xz, gz) still use subprocess as before. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
59 lines
1.7 KiB
Python
59 lines
1.7 KiB
Python
from subprocess import Popen, PIPE
|
|
import re
|
|
from collections import defaultdict
|
|
from os import path
|
|
import glob
|
|
import io
|
|
import zstandard
|
|
|
|
|
|
def find_dumps(dumpdir, base_pattern):
|
|
|
|
files = glob.glob(path.join(dumpdir,base_pattern))
|
|
|
|
# build a dictionary of possible extensions for each dump
|
|
dumpext = defaultdict(list)
|
|
for fpath in files:
|
|
fname, ext = path.splitext(fpath)
|
|
dumpext[fname].append(ext)
|
|
|
|
ext_priority = ['.zst','.xz','.bz2','.gz']
|
|
|
|
for base, exts in dumpext.items():
|
|
ext = [ext for ext in ext_priority if ext in exts][0]
|
|
yield base + ext
|
|
|
|
def open_fileset(files):
|
|
for fh in files:
|
|
print(fh)
|
|
lines = open_input_file(fh)
|
|
for line in lines:
|
|
yield line
|
|
|
|
def open_input_file(input_filename):
|
|
# .zst handled via the zstandard library to avoid subprocess/container issues
|
|
if re.match(r'.*\.zst$', input_filename):
|
|
fh = open(input_filename, 'rb')
|
|
dctx = zstandard.ZstdDecompressor()
|
|
return io.TextIOWrapper(dctx.stream_reader(fh), encoding='utf-8')
|
|
|
|
if re.match(r'.*\.7z$', input_filename):
|
|
cmd = ["7za", "x", "-so", input_filename, '*']
|
|
elif re.match(r'.*\.bz2$', input_filename):
|
|
cmd = ["bzcat", "-dk", input_filename]
|
|
elif re.match(r'.*\.bz', input_filename):
|
|
cmd = ["bzcat", "-dk", input_filename]
|
|
elif re.match(r'.*\.xz', input_filename):
|
|
cmd = ["xzcat", '-dk', '-T 20', input_filename]
|
|
elif re.match(r'.*\.gz', input_filename):
|
|
cmd = ["zcat", input_filename]
|
|
else:
|
|
return open(input_filename, 'r')
|
|
|
|
try:
|
|
return Popen(cmd, stdout=PIPE).stdout
|
|
except NameError as e:
|
|
print(e)
|
|
return open(input_filename, 'r')
|
|
|