#!/usr/bin/python3
'''gnomonicus is a script which links together the functions defined in gnomonicus.py as a CLI script to 
    produce variants, mutations and an antibiogram from a minos VCF, reference genome and a resistance
    catalogue
'''
import argparse
import logging
import os
import time

import gumpy
import piezo

import gnomonicus
from gnomonicus import (loadGenome, populateEffects, populateMutations,
                        populateVariants, saveJSON)

if __name__ == "__main__":
    start = time.time()
    #Argparser setup
    parser = argparse.ArgumentParser()
    parser.add_argument('-v', '--version', action='version', version=f'gnomonicus {gnomonicus.__version__}')
    parser.add_argument("--vcf_file",required=True,help="the path to a single VCF file")
    parser.add_argument("--genome_object",required=True,help="the path to a compressed gumpy Genome object or a genbank file")
    parser.add_argument("--catalogue_file",default=None,required=False,help="the path to the resistance catalogue")
    parser.add_argument("--ignore_vcf_filter",action='store_true',default=False,help="whether to ignore the FILTER field in the vcf (e.g. necessary for some versions of Clockwork VCFs)")
    parser.add_argument("--progress",action='store_true',default=False,help="whether to show progress using tqdm")
    parser.add_argument("--output_dir", required=False, default=".", help="Directory to save output files to. Defaults to wherever the script is run from.")
    parser.add_argument("--json", required=False, action='store_true', default=False, help="Flag to create a single JSON output as well as the CSVs")
    parser.add_argument("--fasta", required=False, default=None, help="Use to output a FASTA file of the resultant genome. Specify either 'fixed' or 'variable' for fixed length and variable length FASTA respectively. Alternatively, use 'both' to produce both")
    parser.add_argument("--minor_populations", required=False, default=None, help="Path to a line separated file containing genome indices of minor populations.")
    parser.add_argument("--csvs", required=False, nargs="+", help="Types of CSV to produce. Accepted values are [variants, mutations, effects, predictions, all]. `all` produces all of the CSVs")
    parser.add_argument("--debug", default=False, action='store_true', help='Whether to log debugging messages to the log. Defaults to False')
    parser.add_argument("--resistance_genes", required=False, default=False, action="store_true", help="Flag to filter mutations and variants to only include genes present in the resistance catalogue")
    options = parser.parse_args()

    options.output_dir = os.path.realpath(options.output_dir)

    #Make the output directory if it doesn't already exist
    os.makedirs(options.output_dir, exist_ok=True)

    #Get the stem of the VCF filename for use as a unique ID
    vcfStem = os.path.split(options.vcf_file)[-1].replace(".vcf", "")

    #Logging setup
    if options.debug:
        LOG_LEVEL = logging.DEBUG
    else:
        LOG_LEVEL = logging.WARNING
    logging.basicConfig(filename=os.path.join(options.output_dir, f'{vcfStem}.gnomonicus.log'), filemode='w', format='%(asctime)s -  %(levelname)s - %(message)s', datefmt='%d-%b-%y %H:%M:%S', level=LOG_LEVEL)
    logging.info(f"gnomonicus starting with output directory {options.output_dir}")

    #Figure out which CSVs should be produced
    if options.csvs is None:
        make_variants_csv = False
        make_mutations_csv = False
        make_effects_csv = False
        make_prediction_csv = False
    elif "all" in options.csvs:
        make_variants_csv = True
        make_mutations_csv = True
        make_effects_csv = True
        make_prediction_csv = True
    else:
        make_variants_csv = "variants" in options.csvs
        make_mutations_csv = "mutations" in options.csvs
        make_effects_csv = "effects" in options.csvs
        make_prediction_csv = "predictions" in options.csvs
    
    if options.csvs is None and options.json is False and options.fasta is None:
        #No files will be created so warn the user
        print("[WARNING]: No outputs selected. No files will be created. For help, try `gnomonicus --help`")
        logging.warning("No outputs selected. No files will be created")


    #Get reference genome
    reference = loadGenome(options.genome_object, options.progress)
    logging.debug("Loaded reference genome")

    #Check if we have minor population indices
    if options.minor_populations is not None:
        #Load from line separated file
        with open(options.minor_populations) as f:
            minor_indices_ = [line.strip() for line in f]
            minor_indices = []
            #Try converting line by line so we can produce a nice error if invalid
            for idx, index in enumerate(minor_indices_):
                try:
                    minor_indices.append(int(index))
                except Exception as e:
                    raise ValueError(f"Invalid index on line {idx}: {index}") from e
    else:
        minor_indices = []
    #Build the mutated genome using gumpy
    vcf = gumpy.VCFFile(
                        options.vcf_file, 
                        ignore_filter=options.ignore_vcf_filter, 
                        minor_population_indices=minor_indices
    )
    sample = reference + vcf
    logging.debug("Applied the VCF to the reference")

    #Get resistance catalogue
    if options.catalogue_file:
        resistanceCatalogue = piezo.ResistanceCatalogue(options.catalogue_file, prediction_subset_only=True)
        logging.debug("Loaded resistance catalogue")
    else:
        resistanceCatalogue = None
        logging.info("No resistance catalogue provided, producing variants and mutations only")

    #Get the GenomeDifference for extracting genome level mutations
    diff = reference - sample
    logging.debug("Got the genome difference")

    #Complain if there are no variants
    if diff.variants is None:
        logging.error("No variants detected!")
        raise Exception("No variants detected!")

    #Get the variations and mutations
    variants = populateVariants(vcfStem, options.output_dir, diff, make_variants_csv, options.resistance_genes, catalogue=resistanceCatalogue)
    logging.debug("Populated and saved variants.csv")

    mutations, referenceGenes, minor_errors = populateMutations(vcfStem, options.output_dir, diff, 
                        reference, sample, resistanceCatalogue, make_mutations_csv, options.resistance_genes)
    if mutations is None:
        logging.info("No mutations found - probably due to exclusively inter-gene variation or no variation.\n\t\t\t\t\t\t\t No effects.csv written")
    else:
        logging.debug("Populated and saved mutatons.csv")

    #Get the effects of the mutations
    if resistanceCatalogue is not None and mutations is not None:
        effects, metadata = populateEffects(options.output_dir, resistanceCatalogue, mutations, referenceGenes, vcfStem, make_effects_csv, make_prediction_csv)
        logging.debug("Populated and saved effects.csv")
    else:
        metadata = {}
        effects = None
        logging.info("Skipped effects.csv due to lack of resistance catalogue or mutations")

    #Add data to the log
    logging.info("********** Successfully completed **********")
    
    logging.info(f"VCF file: {options.vcf_file}")
    logging.info(f"Reference genome file: {options.genome_object}")

    if resistanceCatalogue:
        logging.info(f"Catalogue reference genome: {resistanceCatalogue.catalogue.genbank_reference}")
        logging.info(f"Catalogue name: {resistanceCatalogue.catalogue.name}")
        logging.info(f"Catalogue version: {resistanceCatalogue.catalogue.version}")
        logging.info(f"Catalogue grammar: {resistanceCatalogue.catalogue.grammar}")
        logging.info(f"Catalogue values: {resistanceCatalogue.catalogue.values}")
        logging.info(f"Catalogue path: {options.catalogue_file}")
    for drug in sorted(metadata.keys()):
        logging.info(f"{drug} {metadata[drug]}")
    logging.info(f"Completed in {time.time()-start}s")

    if options.json:
        #Default prediction values are RFUS but use piezo catalogue's values if existing
        values = resistanceCatalogue.catalogue.values if resistanceCatalogue is not None else None
        if values is None:
            values = list("RFUS")
        logging.info(f"Saving a JSON... See {options.output_dir}/gnomonicus-out.json")
        saveJSON(variants, mutations, effects, options.output_dir, vcfStem, resistanceCatalogue, gnomonicus.__version__, time.time()-start, reference, options.vcf_file, options.genome_object, options.catalogue_file, minor_errors)

    if options.fasta and options.fasta.lower() in ['fixed', 'variable']:
        fixed = options.fasta.lower() == 'fixed'
        #Write the resultant fasta file
        sample.save_fasta(os.path.join(options.output_dir, vcfStem+"-"+options.fasta+".fasta"), fixed_length=fixed)
    elif options.fasta and options.fasta.lower() == 'both':
        sample.save_fasta(os.path.join(options.output_dir, vcfStem+"-fixed.fasta"), fixed_length=True)
        sample.save_fasta(os.path.join(options.output_dir, vcfStem+"-variable.fasta"), fixed_length=False)
