#!/usr/bin/env python
import re
import sys
import tarfile
import difflib
from os.path import basename, isfile

import ilan_dev
from ilan_dev import lcp, human_bytes, get_dirs


cext_pat = re.compile(r'''
.*lib/
python\d\.\d+/
(site-packages|lib-dynload)/
(?P<mod>\S+?)                   # actual module name
\.cp[\w\-]+\.so
$''', re.VERBOSE)

def tar_cext(path):
    res = []
    with tarfile.open(path) as t:
        for m in t.getmembers():
            match = cext_pat.match(m.name)
            if match is not None:
                res.append(match.group('mod').replace('/', '.'))
    for mod in sorted(res):
        print('import', mod)


def tar_nameset(path):
    with tarfile.open(path) as t:
        return set(m.name for m in t.getmembers())


def tar_common(paths):
    a = tar_nameset(paths[0])
    for path in paths[1:]:
        a &= tar_nameset(path)
    return a


def tar_diff(path_a, path_b):
    a = sorted(s + '\n' for s in tar_nameset(path_a))
    b = sorted(s + '\n' for s in tar_nameset(path_b))
    diff = difflib.unified_diff(a, b, fromfile=path_a, tofile=path_b)
    sys.stdout.writelines(diff)


def show_stat(path):
    size = nfiles = nsym = nlnk = ndirs = 0
    names = []
    with tarfile.open(path) as t:
        for m in t.getmembers():
            names.append((m.name, m.isdir()))
            if m.isfile(): nfiles += 1
            if m.issym(): nsym += 1
            if m.islnk(): nlnk += 1
            if m.isdir(): ndirs += 1
            size += m.size

    print("        regular files: %7d" % nfiles)
    print("       symbolic links: %7d" % nsym)
    print("           hard links: %7d" % nlnk)
    print("          directories: %7d" % ndirs)
    print("           empty dirs: %7d" % len(get_dirs(names, 'empty')))
    print("         missing dirs: %7d" % len(get_dirs(names, 'missing')))
    print("       total archives: %7d" % len(names))
    print("           total size: %7s" % human_bytes(size))
    print("longest common prefix: %r" % lcp([name for name, _ in names]))


def help():
    print("""\
usage: %s [options] COMMAND [TAR ...]",

display useful information about tar files

Commands:
  cext        list Python C extension imports
  common      list archive name common in all tarballs
  diff        show difference of two tarballs (compares archive names only)
  ed          list empty directories
  lcp         show the longest common prefix of all archive names
  ls          list archive members, similar to 'ls -l'
  stat        display a useful summart of information
  size        list file sizes of all archives (sorted by size)

Options:
  -h, --help  show this help message and exit
  --version
""" % basename(sys.argv[0]))


CMDS = ['cext', 'common', 'diff', 'ed', 'lcp', 'ls', 'stat', 'size']

def check_commamd(cmd):
    if cmd in CMDS:
        return
    print("no such command: %r" % cmd)
    close_matches = difflib.get_close_matches(cmd, CMDS)
    if close_matches:
        print("did you mean %r?" % close_matches[0])
    else:
        print("commands are: %r" % CMDS)
    return True


def main():
    args = sys.argv[1:]
    if len(args) == 0 or '-h' in args or '--help' in args:
        help()
        return

    if "--version" in args:
        print("ilan-dev: %s" % ilan_dev.__version__)
        return

    cmd, args = args[0], args[1:]
    if check_commamd(cmd):
        sys.exit(1)

    if cmd == 'common':
        if len(args) == 0:
            sys.exit("'common' requires at least one tar archive, try --help")
        for x in sorted(tar_common(args)):
            print(x)
        return

    if cmd == 'diff':
        if len(args) != 2:
            sys.exit("'diff' requires exactly two tar archives, try --help")
        tar_diff(*args)
        return

    for path in args:
        if not isfile(path):
            sys.exit("no such file: %r" % path)

        if len(args) > 1:
            print("==> %s <==" % path)

        if cmd == 'cext':
            tar_cext(path)

        if cmd == 'ed':
            with tarfile.open(path) as t:
                lst = [(m.name, m.isdir()) for m in t.getmembers()]
            for name in sorted(get_dirs(lst, 'empty')):
                print(name)

        if cmd == 'lcp':
            with tarfile.open(path) as t:
                s = lcp(m.name for m in t.getmembers())
            if s:
                print(s)

        if cmd == 'ls':
            with tarfile.open(path) as t:
                t.list()

        if cmd == 'size':
            with tarfile.open(path) as t:
                lst = [(m.size, m.name) for m in t.getmembers()]
            for size, name in sorted(lst):
                print("%-60s %10s" % (name, human_bytes(size)))

        if cmd == 'stat':
            show_stat(path)


if __name__ == '__main__':
    main()
