#!/usr/bin/env python3

import os
import hashlib
import sys
#from ruamel.yaml import YAML
#from ruamel.yaml.comments import CommentedMap
import pathlib
import json
from collections import OrderedDict


#yaml = YAML()

MAX_FILE_SIZE_HASH = 1024 * 1024  # 1 MB

if len(sys.argv) > 1:
    depth = int(sys.argv[1])
else:
    depth = 0


def md5(fname):
    """  Get md5 hash of file. See: https://stackoverflow.com/a/3431838 """
    hash_md5 = hashlib.md5()
    with open(fname, "rb") as f:
        for chunk in iter(lambda: f.read(4096), b""):
            hash_md5.update(chunk)
    return hash_md5.hexdigest()


def file_info(path, depth):
    try:
        filepath = str(path.absolute())
        file_size_bytes = os.path.getsize(filepath)
        file_mod_time = os.path.getmtime(filepath)
        if path.is_dir():
            if depth < 1:
                return ', '.join([str(file_size_bytes),
                                  str(file_mod_time), 'DIR'])
            else:
                return collect_dir_info(path, depth - 1)
        else:
            file_hash = ("-" * 32) if (
                file_size_bytes > MAX_FILE_SIZE_HASH) else md5(filepath)

            return ', '.join(
                [str(file_size_bytes), str(file_mod_time)[:16], file_hash])
    except FileNotFoundError as e:
        return "deleted"


def collect_dir_info(path, depth):
    return OrderedDict([(p.name or '.', file_info(p, depth))
                        for p in sorted(list(path.iterdir()),
                                        reverse=True)
                        if p.name != '.git'])


json.dump(collect_dir_info(pathlib.Path('.'), depth),
          sys.stdout)
