#!/usr/bin/env python

##### ##### ##### ##### ##### ##### #####
#                                       #
#                 graftM                #
#                                       #
#  A pipeline for gene centric analyses #
#          of metagenome datasets       #
#                                       #
##### ##### ##### ##### ##### ##### #####

__author__ = "Joel Boyd, Ben Woodcroft"
__copyright__ = "Copyright 2014"
__credits__ = ["Joel Boyd", "Ben Woodcroft"]
__license__ = "GPL3"
__maintainer__ = "Joel Boyd, Ben Woodcroft"
__email__ = "joel.boyd near uq.net.au, b.woodcroft near uq.edu.au"
__status__ = "Development"

import argparse
import sys
import os

try:
    from graftm.run import Run
except ImportError:
    sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)),'..'))
import graftm
from graftm.run import Run

def phelp():
    print """
                                       GraftM  %s asdf

             A suite of tools for the rapid analysis of large sequence datasets.

                                Joel Boyd, Ben Woodcroft

=====================================================================================================
COMMUNITY PROFILING

    graft       -       Search for and phylogenetically classify reads associated with a single
                        marker gene, and construct a community profile
                        e.g. usage:
                            $ graftM graft --forward <READS> --graftm_package <GRAFTM_PACKAGE>

=====================================================================================================
UTILITIES


    extract     -       Tools to manage the output of GraftM such as:
                        e.g. usage:
                            $ graftM extract --profile <GRAFTM_PROFILE> --lineage <LINEAGE>
                            
    create      -       Create a graftM package of from aligned sequences or a hmm, and a 
                        taxonomy file.
                        
                        e.g. usage
                            > With aligned sequences:
                            $ graftm create --sequences <SEQUENCES> --alignment <ALIGNED_SEQUENCES> 
                              --taxonomy <GREENGENES_FORMAT_TAXONOMY>
                            
                            > With a HMM:
                            $ graftm create --sequences <SEQUENCES> --hmm <HMM> 
                              --taxonomy <GREENGENES_FORMAT_TAXONOMY>

=====================================================================================================
""" % (graftm.__version__)

if __name__ == '__main__':
    parser = argparse.ArgumentParser(add_help=False)
    parser.add_argument('--version', action='version', version='graftM v%s' % graftm.__version__)

    subparsers = parser.add_subparsers(help="--", dest='subparser_name')

    # Standard GraftM pipeline.
    graft_parser = subparsers.add_parser('graft',
                                        description='Search and classify marker genes to construct community profiles',
                                        epilog='Joel Boyd, Ben Woodcroft')
    graft_parser.add_argument('--forward', metavar='forward read (or single read file)', help='comma separated list of forward reads .fa, or .fq.gz format.', required=True)
    graft_parser.add_argument('--reverse', metavar='reverse read', help='[do NOT use unless you understand the difficulties with this] Optional reverse raw sequence file in .fa, or .fq.gz format', default=argparse.SUPPRESS)
    graft_parser.add_argument('--eval', metavar='evalue', help='evalue cutoff for the hmmsearch (default = 1e-5)', default= '1e-5')
    graft_parser.add_argument('--threads', metavar='threads', help='number of threads to use', default='5')
    graft_parser.add_argument('--placements_cutoff', metavar='confidence', help='Cutoff of placement confidence level (0.5 - 1), default = 0.75', default=0.75)
    graft_parser.add_argument('--graftm_package', metavar='reference_package', help='Reference package of gene family', default=argparse.SUPPRESS)
    graft_parser.add_argument('--force', action="store_true", help='Force overwrite the output directory, even if one already exists with the same name (default=False)', default=False)
    graft_parser.add_argument('--input_sequence_type', help='Specify whether the input sequence is "nucleotide" or "protein" sequence data (default=will attempt to auto-detect)', choices = ['protein', 'nucleotide'],  default=argparse.SUPPRESS)
    graft_parser.add_argument('--search_only', action="store_true", help='Stop after reads have been identified (default=False)', default=False)
    graft_parser.add_argument('--euk_check', action="store_true", help='Search whole sample for 18S and attempt to estimate relative percentage of Eukaryotes (default=False)', default=False)
    graft_parser.add_argument('--output_directory', metavar='reference_package', type=str, help='Output directory name. If unspecified, this file will be named GraftM_proc', default=argparse.SUPPRESS)
    nucleotide_options = graft_parser.add_argument_group('nucleotide search-specific options')
    nucleotide_options.add_argument('--search_hmm_files', nargs='+', help='Specify .hmm files to use in search step', default=argparse.SUPPRESS)
    nucleotide_options.add_argument('--aln_hmm_file', help='Specify a single .hmm file to use in align step (default: the search_hmm_file if there is only 1)', default=argparse.SUPPRESS)
    nucleotide_options.add_argument('--euk_hmm_file', help='Specify the .hmm file to use in the check for euk contamination', default=argparse.SUPPRESS)
    nucleotide_options.add_argument('--check_total_euks', action="store_true", help='Search whole sample for 18S and attempt to estimate relative percentage of Eukaryotes (default=False)', default=False)
    protein_options = graft_parser.add_argument_group('protein search-specific options')
    min_orf_length_default = 96
    protein_options.add_argument('--min_orf_length', metavar='length', help='Minimum number of nucleotides in an open reading frame (default: %s)' % min_orf_length_default, default=min_orf_length_default, type=int)
    protein_options.add_argument('--restrict_read_length', metavar='length', help='Only use this many base pairs at the start of each sequence searched (default: no restriction)', type=int)
    

    # Filter pipeline - Remove rRNA genes from transcriptome dataset.
    filter_parser = subparsers.add_parser('filter',
                                         description='Remove rRNA genes from transcriptome dataset.',
                                         epilog='Joel Boyd, Ben Woodcroft')
    filter_parser.add_argument('--reads', metavar='<READS>', help='File to be filtered', required=True)
    filter_parser.add_argument('--filter_hmms', metavar='<FILTER_HMMS>', help='Directory with HMMs to use when filtering.', required=True)
    filter_parser.add_argument('--output', metavar='<OUTPUT>', help='Directory within which to place filtered reads, and sorted hits')


    # Assemble - Attempt to assemble as many full length genes as possible.
    assemble_parser = subparsers.add_parser('assemble',
                                            description='Attempt to assemble as many genes as possible.',
                                            epilog='Joel Boyd, Ben Woodcroft')
    assemble_parser.add_argument('--graft_run', metavar='File produced ', help='guppy file produced by graftM, comma separated (default=True)', default=True)
    assemble_parser.add_argument('--kmer', metavar='k-mer',  help='k-mer to use for assembly with velvet (default = 51)', default = '51')
    assemble_parser.add_argument('--assembly_type', metavar='type of assembly', help='phrap or velvet assembly',choices = ['phrap', 'velvet', 'finishm'], default='velvet')
    assemble_parser.add_argument('--finish', action = 'store_true', help='finish the velvet assembly with an overlap assembly (default=False)', default=False)

    # Manage - various tools that make the management of graftM output easier
    extract_parser = subparsers.add_parser('manage',
                                            description='Sort and access the reads from your GraftM run.',
                                            epilog='Joel Boyd, Ben Woodcroft')
    extract_parser.add_argument('--seq', metavar='The reads from this lineage will be returned', help='guppy file produced by graftM, comma separated (default=False)', default=False)
    extract_parser.add_argument('--profile', metavar='graftM_profile', help='GraftM profile produced by graftM graft run (required=True)', required=True)
    extract_parser.add_argument('--cutoff', metavar='confidence_cutoff', help='Cutoff of placement confidence level (0.5 - 1), default = 0.75', default=0.75)
    extract_parser.add_argument('--non_cumil', action = 'store_true', help='Return all sequences exclusively at this taxonomic rank (i.e. do not return sequences that were classified to a higher resolution, default=False)', default=False)
    extract_parser.add_argument('--file', action = 'store_true', help='Lineages are in a file, one entry per line (default=False)', default=False)

    # Create - Create a gpkg from a sequence database and a hmm
    create_parser = subparsers.add_parser('create',
                                            description='Create GraftM packages from sequences, and a hmm',
                                            epilog='Joel Boyd, Ben Woodcroft')
    create_parser.add_argument('--sequences', metavar='sequences', help='Sequences with which to create a graftM package', default=None)
    create_parser.add_argument('--hmm', metavar='HMM', help='HMM used to create the GraftM package (default: Build HMM from sequences.)', default=None)
    create_parser.add_argument('--taxonomy', metavar='tax', help='File containing two tab separated columns, the first with the ID of the sequences, the second with the taxonomy string (GreenGenes taxonomy file format).', required=True)
    create_parser.add_argument('--alignment', metavar='aln', help='An alignment with which to build a custom HMM.', default=None)
    create_parser.add_argument('--tree', help='A tree with which to build the refpkg. WARNING: A HMM and alignment must be provided in conjunction with this flag', default=None)
    create_parser.add_argument('--log', help='A log file for the tree. WARNING: A HMM and alignment must be provided in conjunction with this flag', default=None)
    create_parser.add_argument('--output', metavar='o', help='Name of graftM package.', default=None)
    
    # Pathfinder - Find a whole pathway of genes in a metagenome/transcriptome.
    pathfinder_parser = subparsers.add_parser('pathfinder',
                                              description='Find a whole pathway of genes in a metagenome/transcriptome.',
                                              epilog='Joel Boyd, Ben Woodcroft')


    if(len(sys.argv) == 1 or sys.argv[1] == '-h' or sys.argv[1] == '--help'):
        phelp()
    else:
        args = parser.parse_args()
        Run(args).main()

