#!/usr/bin/env python3

import os
import sys

_APP_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__)))
sys.path.insert(0, _APP_PATH)

_APP_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', '..'))
sys.path.insert(0, _APP_PATH)

import argparse

import pathhistogram.histogram

_DEFAULT_HISTOGRAM_N = 30

def _get_args():
    p = argparse.ArgumentParser(
        description='Generate a histogram of filesizes')

    p.add_argument(
        'path',
        help="Path")

    p.add_argument(
        '-f', '--filter',
        action='append',
        default=[],
        dest='filters',
        help="One filespec for filter for. May be provided zero or more times")

    p.add_argument(
        '-n', '--slices',
        type=int,
        default=_DEFAULT_HISTOGRAM_N,
        help="Number of slices in the histogram. Defaults to {}.".format(_DEFAULT_HISTOGRAM_N))

    p.add_argument(
        '--minimum-size',
        type=int,
        help="Impose a limit on the smallest file that we're allowed to look at")

    p.add_argument(
        '--maximum-size',
        type=int,
        help="Impose a limit on the smallest file that we're allowed to look at")

    p.add_argument(
        '--dump-filepath',
        help="Write the binned files out to a file (or '-' for STDOUT)")

    args = p.parse_args()
    return args

def _main():
    args = _get_args()
    ph = pathhistogram.histogram.Histogram()

    files, largest_size = \
        ph.read_files(
            args.path,
            filters=args.filters,
            minimum_size=args.minimum_size,
            maximum_size=args.maximum_size)

    if not files:
        print("No files found.")
        return

    binned_files, slice_size = \
        ph.load_histogram(files, largest_size, args.slices)

    ph.print_histogram(binned_files, slice_size)

    if args.dump_filepath is not None:
        filepath = args.dump_filepath
        if filepath == '-':
            print()
            print('Binned Files')
            print('============')
            print()

            _dump_bins(binned_files, largest_size, slice_size, sys.stdout)

            print('')
        else:
            with open(filepath, 'w') as f:
                _dump_bins(binned_files, largest_size, slice_size, f)

def _dump_bins(binned_files, largest_size, slice_size, f):
    total_count = sum([
        len(files)
        for files
        in binned_files.values()
    ])

    f.write("n {}\n".format(len(binned_files)))
    f.write("total_count {}\n".format(total_count))
    f.write("largest_size {}\n".format(largest_size))
    f.write("slice_size {}\n".format(slice_size))
    f.write('\n')

    for i in range(len(binned_files)):
        files = binned_files[i]

        # Skip the empty bins. They can derive them if they want.
        if not files:
            continue

        f.write("# {}\n".format(i))
        for rel_filepath in files:
            f.write("{}\n".format(rel_filepath))

        f.write('\n')

if __name__ == '__main__':
    _main()
