#!/usr/bin/env python
"""
Filter a metapeptide database.
"""

import argparse
import logging
from datetime import datetime
from Bio import bgzf
from sixgill import metapeptides

__author__ = "Damon May"
__copyright__ = "Copyright (c) 2016 Damon May"
__license__ = "Apache 2.0"
__version__ = "0.1"

logger = logging.getLogger(__name__)

DEFAULT_MAXQ = 0.01


def declare_gather_args():
    """
    Declare all arguments, parse them, and return the args dict.
    Does no validation beyond the implicit validation done by argparse.
    return: a dict mapping arg names to values
    """

    # declare args
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument('metapeptidefile', type=argparse.FileType('r'),
                        help='input metapeptide database file')
    parser.add_argument('--out', required=True, type=argparse.FileType('w'),
                        help='output metapeptide database file')
    parser.add_argument('--minorflength', type=int, default=0,
                        help='minimum ORF length')
    parser.add_argument('--minaaseqlength', type=int, default=0,
                        help='minimum AA sequence length')
    parser.add_argument('--minreadcount', type=int, default=0,
                        help='minimum read count')
    parser.add_argument('--minqualscore', type=int, default=0,
                        help='minimum basecall quality')
    parser.add_argument('--minlongesttryppeplen', type=int, default=0,
                        help='minimum length of the longest tryptic peptide')
    parser.add_argument('--minmetagenescore', type=float, default=metapeptides.METAGENE_SCORE_MISSING,
                        help='Minimum MetaGene score (%d for none)' % metapeptides.METAGENE_SCORE_MISSING)
    parser.add_argument('--maxmetapeptides', type=int, default=None,
                        help='maximum number of metapeptides to write')

    parser.add_argument('--debug', action="store_true", help='Enable debug logging')
    return parser.parse_args()


def main():
    args = declare_gather_args()
    # logging
    logging.basicConfig(level=logging.INFO,
                        format="%(asctime)s %(levelname)s: %(message)s")
    if args.debug:
        logger.setLevel(logging.DEBUG)
        # any module-specific debugging goes below
        metapeptides.logger.setLevel(logging.DEBUG)

    script_start_time = datetime.now()
    logger.debug("Start time: %s" % script_start_time)

    out_bgzfwriter = bgzf.BgzfWriter(args.out.name, "w")

    print("Processing file %s" % args.metapeptidefile.name)

    db_file = bgzf.BgzfReader(args.metapeptidefile.name)
    n_rows_written = metapeptides.write_metapeptides(metapeptides.filter_metapeptides(
            metapeptides.read_metapeptides(db_file),
            args.minorflength, args.minaaseqlength,
            args.minreadcount, args.minqualscore, args.minlongesttryppeplen,
            args.minmetagenescore,
            max_metapeptides=args.maxmetapeptides),
        out_bgzfwriter)
    out_bgzfwriter.close()
    args.out.close()
    print("Wrote metapeptide database %s" % args.out.name)
    print("Done. Wrote %d rows" % n_rows_written)


main()
