#!/usr/bin/env python
# coding: utf-8

from __future__ import (absolute_import, division, print_function)

from logging import getLogger, StreamHandler, DEBUG
logger = getLogger(__name__)
handler = StreamHandler()
# handler.setLevel(DEBUG)
# logger.setLevel(DEBUG)
logger.addHandler(handler)
logger.propagate = False


from pathlib import Path
from common.imagededuper import ImageDeduper

import sys


def is_image(path):
    img_file_ext = ['.png', '.PNG', '.jpg', '.JPG', '.jpeg', '.JPEG', '.gif', '.GIF']
    return path.suffix in img_file_ext


def gen_image_filenames(target_dir, recursive):
    image_filenames = []
    if recursive:
        for path in Path(target_dir).glob('**/*'):
            if is_image(path):
                image_filenames.append(str(path))
    else:
        for path in Path(target_dir).glob('*'):
            if is_image(path):
                image_filenames.append(str(path))
    if len(image_filenames) == 0:
        logger.error("Image not found. To search the directory recursively, add the --recursive option.")
        sys.exit(0)
    return image_filenames


def dedupe_images(args):
    target_dir = args.target_dir
    recursive = args.recursive
    try:
        deduper = ImageDeduper(args, gen_image_filenames(target_dir, recursive))
        deduper.dedupe(args)

        if args.delete:
            deduper.preserve(args)
        else:
            deduper.print_duplicates(args)
    except KeyboardInterrupt:
        sys.exit(1)


def main(argv=sys.argv[1:]):
    import argparse
    parser = argparse.ArgumentParser(
        description="finding and deleting duplicate image files based on perceptual hash")
    parser.add_argument("target_dir", type=str)
    parser.add_argument("hash_method", type=str,
        choices=['ahash', 'phash', 'dhash', 'whash'],
        help="""method of perceptual hashing.
            ahash(Average hash) phash(Perceptual hash) dhash(Difference hash)
            whash(Haar wavelet hash)""")
    parser.add_argument("hamming_distance", type=int,
        help="allowable Hamming distances.")
    # parser.add_argument("--preserve_largest", type=str,
    #     choice=['filesize', 'pixelsize'],
    #     help="preserve the larget filesize or pixelsize image from duplicate images")
    parser.add_argument("-r", "--recursive", action="store_true", default=False,
        help="search images recursively from the target directory")
    parser.add_argument("-d", "--delete", action="store_true",
        help="prompt user for files to preserve and delete")
    parser.add_argument("-c", "--imgcat", action="store_true",
        help="display duplicate images for iTerm2")
    parser.add_argument("--sort", type=str,
        choices=['filesize', 'filepath', 'imagesize', 'width', 'height'],
        help="how to sort duplicate image files (default=filesize)")
    parser.add_argument("--reverse", action="store_true",
        help="reverse order while sorting")
    parser.add_argument("--num-proc", type=int, default=None,
        help="number of hash calculation and ngt processes (default=cpu_count-1)")
    parser.add_argument("--log", action="store_true",
        help="output logs of duplicate and delete files")
    parser.add_argument("--cache", action="store_true",
        help="create and use image hash cache")
    parser.add_argument("--size", type=str, default="256x256",
        help="resize image (default=256x256)")
    parser.add_argument("--space", type=int, default=0,
        help="space between images (default=0)")
    parser.add_argument("--space-color", type=str, default='black',
        help="space color between images (default=black)")
    parser.add_argument("--tile-num", type=int, default=4,
        help="horizontal tile number (default=4)")
    parser.add_argument("--interpolation", type=str, default="INTER_LINEAR",
        help="interpolation methods")
    parser.add_argument("--no-keep-aspect", dest="keep_aspect", action="store_false",
        help="do not keep aspect when displaying imagest")
    parser.add_argument("--no-subdir-warning", dest="print_warning", action="store_false",
        help="stop warnings that appear when similar images are in different subdirectories")
    parser.add_argument("--dry-run", dest="run", action="store_false",
        help="dry run (do not delete any files)")
    parser.add_argument("--faiss-flat", action="store_true",
        help="use faiss exact search (IndexFlatL2) for calculating Hamming distance between hash of images")
    parser.add_argument("--ngt", action="store_true",
        help="use NGT for calculating Hamming distance between hash of images")
    parser.add_argument("--ngt-k", type=int, default=20,
        help="""number of searched objects when using NGT.
            Increasing this value, improves accuracy and increases computation time.
            (default=20)""")
    parser.add_argument("--ngt-epsilon", type=float, default=0.1,
        help="""search range when using NGT.
            Increasing this value, improves accuracy and increases computation time.
            (default=0.1)""")
    parser.add_argument("--hnsw", action="store_true",
        help="use hnsw for calculating Hamming distance between hash of images")
    parser.add_argument("--hnsw-k", type=int, default=20,
        help="""number of searched objects when using hnsw.
            Increasing this value, improves accuracy and increases computation time.
            (default=20)""")
    parser.add_argument("--hnsw-ef-construction", type=int, default=100,
        help="controls index search speed/build speed tradeoff (default=100)")
    parser.add_argument("--hnsw-m", type=int, default=16,
        help="""m is tightly connected with internal dimensionality of the data
            stronlgy affects the memory consumption (default=16)""")
    parser.add_argument("--hnsw-ef", type=int, default=50,
        help="controls recall. higher ef leads to better accuracy, but slower search (default=50)")
    args = parser.parse_args()
    dedupe_images(args)


if __name__ == '__main__':
    main()
