from subprocess import Popen, PIPE
import re
from collections import defaultdict
from os import path
import glob
import io
import zstandard


def find_dumps(dumpdir, base_pattern):

    files = glob.glob(path.join(dumpdir,base_pattern))

    # build a dictionary of possible extensions for each dump
    dumpext = defaultdict(list)
    for fpath in files:
        fname, ext = path.splitext(fpath)
        dumpext[fname].append(ext)

    ext_priority = ['.zst','.xz','.bz2','.gz']

    for base, exts in dumpext.items():
        ext = [ext for ext in ext_priority if ext in exts][0]
        yield base + ext

def open_fileset(files):
    for fh in files:
        print(fh)
        lines = open_input_file(fh)
        for line in lines:
            yield line

def open_input_file(input_filename):
    # .zst handled via the zstandard library to avoid subprocess/container issues
    if re.match(r'.*\.zst$', input_filename):
        fh = open(input_filename, 'rb')
        dctx = zstandard.ZstdDecompressor()
        return io.TextIOWrapper(dctx.stream_reader(fh), encoding='utf-8')

    if re.match(r'.*\.7z$', input_filename):
        cmd = ["7za", "x", "-so", input_filename, '*']
    elif re.match(r'.*\.bz2$', input_filename):
        cmd = ["bzcat", "-dk", input_filename]
    elif re.match(r'.*\.bz', input_filename):
        cmd = ["bzcat", "-dk", input_filename]
    elif re.match(r'.*\.xz', input_filename):
        cmd = ["xzcat", '-dk', '-T 20', input_filename]
    elif re.match(r'.*\.gz', input_filename):
        cmd = ["zcat", input_filename]
    else:
        return open(input_filename, 'r')

    try:
        return Popen(cmd, stdout=PIPE).stdout
    except NameError as e:
        print(e)
        return open(input_filename, 'r')