#!/usr/bin/env python
import argparse
import logging
from vcf_dedup.runner import VcfDedupRunner


def main():

    parser = argparse.ArgumentParser(description = 'VCF variant duplication removal - vcf-dedup v0.4.4')
    parser.add_argument('--input-vcf', metavar='input_vcf', help = 'The input VCF compressed or not [required]', required = True)
    parser.add_argument('--output-vcf', metavar='output_vcf', help='The output VCF. If not provided it will write to standard output', default=None)
    parser.add_argument('--variant-caller', metavar='variant_caller',
                        help='The variant caller used to generate the input VCF. One of "strelka", '
                             '"starling", "platypus", "generic", "duplication_finder" [required]', required=True)
    parser.add_argument('--selection-method', metavar='selection_method',
                        help='The selection method for ties in filtering status between duplicated variants. One of "af", '
                             '"quality", "allele_calls", "arbitrary" [required]', required=True)
    parser.add_argument('--equality-mode', metavar='equality_mode',
                        help='The mode to find equal variants. "1": chromosome, position, reference and alternate must '
                             'be equal; "2": chromosome, position and reference must be equal [required]',
                        default="1")
    parser.add_argument('--sample-idx', metavar='sample_idx',
                        help='For multisample VCFs this 0-based index indicates the sample to be used to merge'
                             'duplicated variants. E.g. for duplication removal based on highest AF, the highest AF '
                             'will be calculated on the sample indicated by this index.',
                        default=None)
    parser.add_argument('--sample-name', metavar='sample_name',
                        help='For multisample VCFs the sample name indicates the sample to be used to merge'
                             'duplicated variants. E.g. for duplication removal based on highest AF, the highest AF '
                             'will be calculated on the indicated sample. NOTE: sample-name overrides sample-idx',
                        default=None)
    parser.add_argument('--sort-threads', metavar='sort_threads',
                        help='The number of threads that UNIX sort will use.',
                        default="1")
    parser.add_argument('--sort-tmp-folder', metavar='sort_tmp_folder',
                        help='The temporary folder that UNIX sort will use (default: output folder).',
                        default="")
    parser.add_argument('--sort',
                        help='Sorts the VCF before dedupping. Beware that VCF needs to be sorted considering '
                             'chromosome, position, reference and alternate bases.',
                        action='store_true')
    parser.add_argument('--sort-mem-percentage', metavar='sort_mem_percentage',
                        help='The percentage of memory that sort will use. Values in the range [1, 100]. Default: 80',
                        default="80")
    parser.add_argument('--log-file', metavar='log_file',
                        help='The file to which logs will be appended (default: stderr).',
                        default="")
    parser.add_argument('--verbose', action='store_true')
    parser.set_defaults(equality_mode="1")
    parser.set_defaults(sample_idx=0)
    parser.set_defaults(sort_threads="1")
    parser.set_defaults(sort_tmp_folder="")
    parser.set_defaults(sort_mem_percentage="80")
    parser.set_defaults(log_file="")
    args = parser.parse_args()

    # Creates a data structure with all config parameters
    config = {
        # Sets parameters from CLI
        "input_vcf" : args.input_vcf,
        "output_vcf" : args.output_vcf,
        "variant_caller": args.variant_caller,
        "selection_method": args.selection_method,
        "equality_mode": args.equality_mode,
        "sample_idx": args.sample_idx,
        "sample_name": args.sample_name,
        "sort_vcf": args.sort,
        "sort_threads": args.sort_threads,
        "temp_folder": args.sort_tmp_folder,
        "sort_mem_percentage": args.sort_mem_percentage,
        "log_level" : 10 if args.verbose else 40
    }
    if args.log_file != "":
        config["log_file"] = args.log_file
    if args.verbose:
        config["verbose"] = True

    # Calls the VCF dedupper
    runner = VcfDedupRunner(config)
    runner.run()

if __name__ == '__main__':
    main()
