#!/usr/bin/env python

__author__ = "Ben Woodcroft"
__copyright__ = "Copyright 2015-2016"
__credits__ = ["Ben Woodcroft"]
__license__ = "GPL3+"
__maintainer__ = "Ben Woodcroft"
__email__ = "b.woodcroft near uq.edu.au"
__status__ = "Development"

import argparse
import logging
import sys
import os
import math

sys.path = [os.path.join(os.path.dirname(os.path.realpath(__file__)),'..')] + sys.path

import singlem.sequence_database
import singlem.query_formatters
import singlem.pipe as pipe
from singlem.pipe import SearchPipe
from singlem.summariser import Summariser
from singlem.strain_summariser import StrainSummariser
from singlem.sequence_classes import SeqReader
from singlem.metagenome_otu_finder import MetagenomeOtuFinder
from singlem.package_creator import PackageCreator
from singlem.otu_table_collection import OtuTableCollection, StreamingOtuTableCollection
from singlem.clusterer import Clusterer
from singlem.appraiser import Appraiser
from singlem.querier import Querier
from singlem.sequence_database import SequenceDatabase
from singlem.chancer import Chancer
from singlem.regenerator import Regenerator
from singlem.taxonomy import Taxonomy
from singlem.renew import Renew
from singlem.singlem import HmmDatabase

DEFAULT_WINDOW_SIZE=60
GENUS_LEVEL_AVERAGE_IDENTITY = 0.89

def seqs(args):
    if args.alignment_type == 'aa':
        is_protein_alignment = True
    elif args.alignment_type == 'dna':
        is_protein_alignment = False
    else:
        raise Exception("Unexpected alignment type '%s'" % args.alignment_type)

    # Read in the fasta Alignment
    protein_alignment = SeqReader().alignment_from_alignment_file(args.alignment)
    logging.info("Read in %i aligned protein sequences e.g. %s %s" % (
        len(protein_alignment),
        protein_alignment[0].name,
        protein_alignment[0].seq))

    best_position = MetagenomeOtuFinder().find_best_window(
        protein_alignment,
        args.window_size,
        is_protein_alignment)
    logging.info("Found best start position %i" % best_position)

class bcolors:
    HEADER = '\033[95m'
    OKBLUE = '\033[94m'
    OKGREEN = '\033[92m'
    WARNING = '\033[93m'
    FAIL = '\033[91m'
    ENDC = '\033[0m'

    # def disable(self):
    #     self.HEADER = ''
    #     self.OKBLUE = ''
    #     self.OKGREEN = ''
    #     self.WARNING = ''
    #     self.FAIL = ''
    #     self.ENDC = ''

class MyParser(argparse.ArgumentParser):
    FULL_HELP_FLAG = 'full_help'

    PIPE_DEFAULT_ASSIGNMENT_METHOD=pipe.PPLACER_ASSIGNMENT_METHOD

    def format_help(self):
        prog = self.prog
        detail_str = 'See %s --%s for further options and further detail.\n' % (
            prog,
            self.FULL_HELP_FLAG)
        splits = prog.split()
        if len(splits) == 2:
            subcommand = splits[1]
        else:
            subcommand = None
        if subcommand == 'pipe':
            help = '\n'+bcolors.OKGREEN+prog+': '+self.description+bcolors.ENDC+'''

'''+bcolors.HEADER+'Common usage:'+bcolors.ENDC+'''

With paired read input:

  singlem pipe --forward <fastq_or_fasta1> --reverse <fastq_or_fasta2> \\
    --otu_table <output.otu_table.tsv>

Or with other input sequence types:

  singlem pipe --sequences <input_fastq_or_fasta> \\
    --otu_table <output.otu_table.tsv>

'''+bcolors.HEADER+'Popular options:'+bcolors.ENDC+'''
  --threads N\t\tUse N threads.
  --output_extras\tOutput more detailed information in the OTU table.
  --assignment_method {%s,%s,%s}\t
\t\t\tSpecify taxonomic assignment method [default: %s].

%s
''' % (pipe.PPLACER_ASSIGNMENT_METHOD,
       pipe.DIAMOND_ASSIGNMENT_METHOD,
       pipe.DIAMOND_EXAMPLE_BEST_HIT_ASSIGNMENT_METHOD,
       self.PIPE_DEFAULT_ASSIGNMENT_METHOD,
       detail_str)
            return help

        elif subcommand == 'summarise':
            help = '\n'+bcolors.OKGREEN+prog+': '+self.description+bcolors.ENDC+'''

'''+bcolors.HEADER+'To convert an OTU table into a Krona HTML visualisation:'+bcolors.ENDC+'''

  singlem summarise --input_otu_tables otu_table.from_pipe.tsv \\
            --krona singlem.krona.html

'''+bcolors.HEADER+'To cluster an OTU table based on 89 percent sequence identity:'+bcolors.ENDC+'''

  singlem summarise --input_otu_tables otu_table.from_pipe.tsv \\
            --cluster --cluster_id 0.89 \\
            --output_otu_table clustered.otu_table.tsv

'''+bcolors.HEADER+'To concatenate several OTU tables together:'+bcolors.ENDC+'''

  singlem summarise --input_otu_tables \\
            otu_table1.from_pipe.tsv otu_table2.from_pipe.tsv \\
            --output_otu_table concatenated.otu_table.tsv

%s
''' % detail_str
            return help

        elif subcommand == 'appraise':
            help = '\n'+bcolors.OKGREEN+prog+': '+self.description+bcolors.ENDC+'''

'''+bcolors.HEADER+'Determine how much of a metagenome is represented by some genomes:'+bcolors.ENDC+'''

  singlem appraise --metagenome_otu_tables otu_table.from_pipe_on_reads.tsv \\
            --genome_otu_tables otu_table.from_pipe_on_genomes.tsv

'''+bcolors.HEADER+'Determine how much of a metagenome is represented by an assembly:'+bcolors.ENDC+'''

  singlem appraise --metagenome_otu_tables otu_table.from_pipe_on_reads.tsv \\
            --assembly_otu_tables otu_table.from_pipe_on_assembly.tsv

'''+bcolors.HEADER+\
    'Visualise which OTUs have been assembled or binned.'+bcolors.ENDC+ \
''' One marker gene is chosen
for visualisation, the one best represents the fraction binned, assembled or
otherwise. To interpret these visualisations, see the description in the README
at
https://github.com/wwood/singlem#appraising-assembly-and-genome-recovery-efforts

  singlem appraise --metagenome_otu_tables otu_table.from_pipe_on_reads.tsv \\
            --assembly_otu_tables otu_table.from_pipe_on_assembly.tsv \\
            --genome_otu_tables otu_table.from_pipe_on_genomes.tsv \\
            --plot singlem_appraise.svg

%s
''' % detail_str
            return help

        elif subcommand == 'makedb':
            help = '\n'+bcolors.OKGREEN+prog+': '+self.description+bcolors.ENDC+'''

'''+bcolors.HEADER+'Generate a queryable database from an OTU table:'+bcolors.ENDC+'''

  singlem makedb --db my.sdb --otu_table otu_table.from_pipe.tsv

%s
''' % detail_str
            return help

        elif subcommand == 'query':
            help = '\n'+bcolors.OKGREEN+prog+': '+self.description+bcolors.ENDC+'''

'''+bcolors.HEADER+'Query a database via an OTU sequence:'+bcolors.ENDC+'''

  singlem query --db my.sdb --query_sequence \\
            AAAGACAATCACAGCCGGCGTGGCCTGCTGATGTTGGTTGGCCAACGCAAGAAACTCCTC

'''+bcolors.HEADER+'Query a database with all entries in an OTU table:'+bcolors.ENDC+'''

  singlem query --db my.sdb --query_otu_table another.otu_table.tsv

'''+bcolors.HEADER+'Extract all OTUs assigned to Firmicutes:'+bcolors.ENDC+'''

  singlem query --db my.db --taxonomy Firmicutes

%s
''' % detail_str
            return help

        elif subcommand == 'renew':
            help = '\n'+bcolors.OKGREEN+prog+': '+self.description+bcolors.ENDC+'''

'''+bcolors.HEADER+'Example:'+bcolors.ENDC+'''

  singlem renew --input_archive_otu_table my.archive_otu_table_from_pipe.json \\
            --sequences <input_fastq_or_fasta> --otu_table <output.otu_table.tsv>

'''+bcolors.HEADER+'Note:'+bcolors.ENDC+'''

  The <input_fastq_or_fasta> must be the same as was used in the previous run
  of 'pipe'. Note that this mode should be used with care, especially because
  the singlem package sets used in each run may change in time.

  This 'renew' mode accepts all arguments that 'pipe' does.

%s
''' % detail_str
            return help

        else:
            return argparse.ArgumentParser.format_help(self)


if __name__ == '__main__':
    parent_parser = argparse.ArgumentParser(add_help=False)
    parent_parser.add_argument('--debug', help='output debug information', action="store_true")
    parent_parser.add_argument('--version', help='output version information and quit',  action='version', version=singlem.__version__)
    parent_parser.add_argument('--quiet', help='only output errors', action="store_true")

    parser = argparse.ArgumentParser(parents=[parent_parser])
    subparsers = parser.add_subparsers(title="Sub-commands", dest='subparser_name', parser_class=MyParser)
    subparser_name_to_parser = {}

    def new_subparser(subparsers, parser_name, parser_description):
        subpar = subparsers.add_parser(parser_name,
                                       description=parser_description,
                                       help=parser_description,
                                       epilog=__author__,
                                       parents=[parent_parser])
        subpar.add_argument('--%s' % MyParser.FULL_HELP_FLAG, action='store_true', default=False, help='display all help options')
        subparser_name_to_parser[parser_name] = subpar
        return subpar

    pipe_description = 'Generate an OTU table from raw sequences.'
    pipe_parser = new_subparser(subparsers, 'pipe', pipe_description)

    # Make a function here so the code can be re-used between pipe and renew
    def add_common_pipe_arguments(argument_group):
        argument_group.add_argument('--sequences', '--forward',
                                    required=True,
                                    nargs='+',
                                    metavar='sequence_file(s)',
                                    help='nucleotide sequence(s) to be searched')
        argument_group.add_argument('--reverse',
                                    nargs='+',
                                    metavar='sequence_file(s)',
                                    help='reverse reads to be searched')
        argument_group.add_argument('--otu_table', metavar='filename', help='output OTU table')
        current_default = 1
        argument_group.add_argument('--threads', type=int, metavar='num_threads', help='number of CPUS to use [default: %i]' % current_default, default=current_default)
        current_default = pipe.PPLACER_ASSIGNMENT_METHOD
        argument_group.add_argument('--assignment_method', choices=(pipe.PPLACER_ASSIGNMENT_METHOD,
                                                                 pipe.DIAMOND_ASSIGNMENT_METHOD,
                                                                 pipe.DIAMOND_EXAMPLE_BEST_HIT_ASSIGNMENT_METHOD),
                                         help='method of assigning taxonomy to sequences. Using \'%s\' means than an example hit ID is given instead of a taxonomic classification [default: %s]' % (
                                             pipe.DIAMOND_EXAMPLE_BEST_HIT_ASSIGNMENT_METHOD,
                                             current_default),
                                         default=current_default)
        argument_group.add_argument('--output_extras', action='store_true',
                                         help='give extra output for each sequence identified (e.g. the read(s) each OTU was generated from) [default: not set]',
                                         default=False)
    common_pipe_arguments = pipe_parser.add_argument_group('Common options')
    add_common_pipe_arguments(common_pipe_arguments)

    def add_less_common_pipe_arguments(argument_group):
        argument_group.add_argument('--archive_otu_table', metavar='filename', help='output OTU table in archive form for making DBs etc. [default: unused]')
        argument_group.add_argument('--working_directory', metavar='directory', help='work in this directory [default: a temporary directory]')
        argument_group.add_argument('--force', action='store_true', help='overwrite working directory if required [default: not set]')
        argument_group.add_argument('--output_jplace', metavar='filename', help='Output a jplace format file for each singlem package to a file starting with this string, each with one entry per OTU. Requires \'%s\' as the --assignment_method [default: unused]' % pipe.PPLACER_ASSIGNMENT_METHOD)
        argument_group.add_argument('--evalue', help='GraftM e-value cutoff [default: the GraftM default]')
        argument_group.add_argument('--min_orf_length',
                                    metavar='length',
                                    help='When predicting ORFs require this many base pairs uninterrupted by a stop codon [default: %i]' % SearchPipe.DEFAULT_MIN_ORF_LENGTH,
                                    type=int, default=SearchPipe.DEFAULT_MIN_ORF_LENGTH)
        argument_group.add_argument('--restrict_read_length',
                                    metavar='length',
                                    help='Only use this many base pairs at the start of each sequence searched [default: no restriction]',
                                    type=int)
        argument_group.add_argument('--filter_minimum_protein',
                                    metavar='length',
                                    help='Ignore reads aligning in less than this many positions to each protein HMM [default: %i]' % SearchPipe.DEFAULT_FILTER_MINIMUM_PROTEIN,
                                    type=int, default=SearchPipe.DEFAULT_FILTER_MINIMUM_PROTEIN)
        argument_group.add_argument('--filter_minimum_nucleotide',
                                    metavar='length',
                                    help='Ignore reads aligning in less than this many positions to each nucleotide HMM [default: %i]' % SearchPipe.DEFAULT_FILTER_MINIMUM_NUCLEOTIDE,
                                    type=int, default=SearchPipe.DEFAULT_FILTER_MINIMUM_NUCLEOTIDE)
        argument_group.add_argument('--include_inserts', action='store_true',
                                    help='print the entirety of the sequences, not just the aligned nucleotides [default: not set]', default=False)
        argument_group.add_argument('--known_otu_tables', nargs='+',
                                    help='OTU tables previously generated with trusted taxonomies for each sequence [default: unused]')
        argument_group.add_argument('--singlem_packages', nargs='+', help='SingleM packages to use [default: use the default set]')
        argument_group.add_argument('--no_assign_taxonomy', action='store_true',
                                    help='Do not assign any taxonomy except for those already known [default: not set]',
                                    default=False)
        argument_group.add_argument('--known_sequence_taxonomy', metavar='FILE',
                                    help='A 2-column "sequence<tab>taxonomy" file specifying some sequences that have known taxonomy [default: unused]')
    less_common_pipe_arguments = pipe_parser.add_argument_group('Less common options')
    add_less_common_pipe_arguments(less_common_pipe_arguments)

    seqs_description = 'Find the best window for a SingleM package.'
    seqs_parser = new_subparser(subparsers, 'seqs', seqs_description)
    seqs_parser.add_argument('--alignment', metavar='aligned_fasta', help="Protein sequences hmmaligned and converted to fasta format with seqmagick", required=True)
    seqs_parser.add_argument('--alignment_type', metavar='type', help="alignment is 'aa' or 'dna'", required=True)
    seqs_parser.add_argument('--window_size', metavar='INT', help='Number of nucleotides to use in continuous window', default=DEFAULT_WINDOW_SIZE, type=int)

    makedb_description = 'Create a searchable database from an OTU table'
    makedb_parser = new_subparser(subparsers, 'makedb', makedb_description)
    makedb_parser.add_argument('--archive_otu_tables', nargs='+', help="Make a db from these archive tables")
    makedb_parser.add_argument('--otu_tables', nargs='+', help="Make a db from these OTU tables")
    makedb_parser.add_argument('--db', '--db_path', help="Name of database to create e.g. tundra.sdb", required=True)
    makedb_parser.add_argument('--clustering_divergence', type=int, help="Cluster DB to reduce RAM usage (no loss i precision)", default=0)

    query_description = 'Find closely related sequences in a database.'
    query_parser = new_subparser(subparsers, 'query', query_description)
    query_db_args = query_parser.add_argument_group('DB/Subject arguments (one is required)')
    query_db_args.add_argument('--db', '--db_path', help="Output from 'makedb' mode")
    query_db_args.add_argument('--subject_otu_tables', nargs='+', metavar='file',
                               help='Pairwise align queries to this otu table (works with inserts, but is slow)')
    query_otu_args = query_parser.add_argument_group('Database querying by OTU sequence')
    query_otu_args.add_argument('--query_sequence', metavar='sequence', help="Sequence to use as a query")
    query_otu_args.add_argument('--query_otu_table', metavar='file', help="Query the database with all sequences in this OTU table")
    query_otu_args.add_argument('--query_fasta', metavar='file', help="Query the database with all sequences in this FASTA file")
    query_otu_args.add_argument('--max_divergence', metavar='INT', help="Report sequences less than or equal to this divergence", default=4, type=int)
    query_other_args = query_parser.add_argument_group('Other database extraction methods')
    query_other_args.add_argument('--sample_names', metavar='name', help='Print all OTUs from these samples', nargs='+')
    query_other_args.add_argument('--taxonomy', metavar='name', help='Print all OTUs assigned a taxonomy including this string e.g. \'Archaea\'')
    query_other_args.add_argument('--dump', action='store_true', help='Print all OTUs in the DB')

    summarise_description = 'Summarise and transform OTU tables.'
    summarise_parser = new_subparser(subparsers, 'summarise', summarise_description)
    summarise_io_args = summarise_parser.add_argument_group('input')
    summarise_io_args.add_argument('--input_archive_otu_tables', nargs='+', help="Summarise these tables")
    summarise_io_args.add_argument('--input_otu_tables', nargs='+', help="Summarise these tables")
    summarise_transformation_args = summarise_parser.add_argument_group('transformation')
    summarise_transformation_args.add_argument('--cluster', action='store_true', help="Apply sequence clustering to the OTU table")
    summarise_transformation_args.add_argument('--cluster_id', type=float, help="Sequence clustering identity cutoff if --cluster is used", default=GENUS_LEVEL_AVERAGE_IDENTITY)
    summarise_transformation_args.add_argument('--taxonomy', help="Restrict analysis to OTUs that have this taxonomy (exact taxonomy or more fully resolved)")
    summarise_transformation_args.add_argument('--rarefied_output_otu_table', help="Output rarefied output OTU table, where each gene and sample combination is rarefied")
    summarise_transformation_args.add_argument('--number_to_choose', type=int, help="Rarefy using this many sequences. Sample/gene combinations with an insufficient number of sequences are ignored with a warning [default: maximal number such that all samples have sufficient counts]")
    summarise_transformation_args.add_argument('--collapse_coupled', action='store_true', help="Merge forward and reverse read OTU tables into a unified table. Sample names of coupled reads must end in '1' and '2' respectively. Read names are ignored, so that if the forward and reverse from a pair contain the same OTU sequence, they will each count separately.")
    summarise_output_args = summarise_parser.add_argument_group('output')
    summarise_output_args.add_argument('--output_otu_table', help="Output combined OTU table to this file")
    summarise_output_args.add_argument('--output_extras', action='store_true', help="Output extra information in the standard output OTU table", default=False)
    summarise_output_args.add_argument('--krona', help="Name of krona file to generate")
    summarise_output_args.add_argument('--wide_format_otu_table', help="Name of output species by site CSV file")
    summarise_output_args.add_argument('--strain_overview_table', help="Name of output strains table to generate")
    summarise_output_args.add_argument('--unifrac_by_otu', help="Output UniFrac format file where entries are OTU sequences")
    summarise_output_args.add_argument('--unifrac_by_taxonomy', help="Output UniFrac format file where entries are taxonomies (generally used for phylogeny-driven beta diversity when pipe was run with '--assignment_method diamond_example')")
    summarise_output_args.add_argument('--biom_prefix', help="Output BIOM format files, one for each marker gene detected")
    summarise_output_args.add_argument('--clustered_output_otu_table', help="Output an OTU table with extra information about the clusters")

    renew_description = 'Reannotate an OTU table with an updated taxonomy'
    renew_parser = new_subparser(subparsers, 'renew', renew_description)
    renew_input_args = renew_parser.add_argument_group('input')
    renew_input_args.add_argument('--input_archive_otu_tables', nargs='+', help="Renew these table(s)", required=True)
    renew_common = renew_input_args.add_argument_group("Common arguments in shared with 'pipe'")
    add_common_pipe_arguments(renew_common)
    renew_less_common = renew_input_args.add_argument_group("Less common arguments shared with 'pipe'")
    add_less_common_pipe_arguments(renew_less_common)

    create_description = 'Create a SingleM package.'
    create_parser = new_subparser(subparsers, 'create', create_description)
    create_parser.add_argument('--input_graftm_package', metavar="PATH", help="input package", required=True)
    create_parser.add_argument('--output_singlem_package', metavar="PATH", help="output package", required=True)
    create_parser.add_argument('--hmm_position', metavar="INTEGER", help="position in the GraftM alignment HMM where the SingleM window starts", required=True, type=int)
    create_parser.add_argument('--window_size', metavar="INTEGER", help="length of residues in the window, counting only those that match the HMM", required=True, type=int)
    create_parser.add_argument('--force', action='store_true', help='overwrite output path if it already exists')

    get_tree_description = 'Extract path to Newick tree file in a SingleM package.'
    get_tree_parser = new_subparser(subparsers, 'get_tree', get_tree_description)
    get_tree_parser.add_argument('--singlem_packages', nargs='+', help='SingleM packages to use [default: use the default set]')

    regenerate_description = 'Update a SingleM package with a new GraftM package (expert mode).'
    regenerate_parser = new_subparser(subparsers, 'regenerate', regenerate_description)
    regenerate_parser.add_argument('--input_singlem_package', metavar="PATH", help="input package", required=True)
    regenerate_parser.add_argument('--output_singlem_package', metavar="PATH", help="output package", required=True)
    regenerate_parser.add_argument('--working_directory', required=True)
    regenerate_parser.add_argument('--euk_sequences', required=True)
    regenerate_parser.add_argument('--euk_taxonomy', required=True)
    regenerate_parser.add_argument('--intermediate_archaea_graftm_package', required=True)
    regenerate_parser.add_argument('--intermediate_bacteria_graftm_package', required=True)
    regenerate_parser.add_argument('--input_taxonomy', required=True)
    regenerate_parser.add_argument('--type_strains_list_file', required=True)

    appraise_description = 'How much of the metagenome do the genomes or assembly represent?'
    appraise_parser = new_subparser(subparsers, 'appraise', appraise_description)
    appraise_otu_table_options = appraise_parser.add_argument_group('Input OTU table options')
    appraise_otu_table_options.add_argument('--metagenome_otu_tables', nargs='+', help="output of 'pipe' run on metagenomes", required=True)
    appraise_otu_table_options.add_argument('--genome_otu_tables', nargs='+', help="output of 'pipe' run on genomes")
    appraise_otu_table_options.add_argument('--assembly_otu_tables', nargs='+', help="output of 'pipe' run on assembled sequence")
    appraise_inexact_options = appraise_parser.add_argument_group('Inexact appraisal options')
    appraise_inexact_options.add_argument('--imperfect', action='store_true', help="use sequence searching to account for genomes that are similar to those found in the metagenome", default=False)
    appraise_inexact_options.add_argument('--sequence_identity', type=float, help="sequence identity cutoff to use if --imperfect is specified", default=GENUS_LEVEL_AVERAGE_IDENTITY)
    appraise_plot_group = appraise_parser.add_argument_group("Plotting-related options")
    appraise_plot_group.add_argument('--plot', help='Output plot SVG filename (marker chosen automatically unless --plot_marker is also specified)', default=None)
    appraise_plot_group.add_argument('--plot_marker', help='Marker gene to plot OTUs from', default=None)
    appraise_plot_group.add_argument('--plot_basename', help="Plot visualisation of appraisal results from all markers to this basename (one SVG per marker)", default=None)
    appraise_otu_table_group = appraise_parser.add_argument_group('Output summary OTU tables')
    appraise_otu_table_group.add_argument('--output_binned_otu_table', help="output OTU table of binned populations", default=None)
    appraise_otu_table_group.add_argument('--output_unbinned_otu_table', help="output OTU table of assembled but not binned populations", default=None)
    appraise_otu_table_group.add_argument('--output_assembled_otu_table', help="output OTU table of all assembled populations", default=None)
    appraise_otu_table_group.add_argument('--output_unaccounted_for_otu_table', help="Output OTU table of populations not accounted for", default=None)

    chance_description = 'Is a lineage likely to be recovered through metagenome assembly / binning?'
    chance_parser = new_subparser(subparsers, 'chance', chance_description)
    chance_parser.add_argument('--otu_tables', nargs='+', help="output of 'pipe' run on metagenome reads", required=True)
    chance_parser.add_argument('--taxonomy', help="target taxonomy", required=True)

    def validate_pipe_args(args):
        if not args.otu_table and not args.archive_otu_table:
            raise Exception("At least one of --otu_table or --archive_otu_table must be specified")
        if args.output_jplace and args.assignment_method != pipe.PPLACER_ASSIGNMENT_METHOD:
            raise Exception("If --output_jplace is specified, then --assignment_method must be set to %s" % pipe.PPLACER_ASSIGNMENT_METHOD)
        if args.output_jplace and args.known_otu_tables:
            raise Exception("Currently --output_jplace and --known_otu_tables are incompatible")
        if args.output_jplace and args.no_assign_taxonomy:
            raise Exception("Currently --output_jplace and --no_assign_taxonomy are incompatible")
        if args.known_sequence_taxonomy and not args.no_assign_taxonomy:
            raise Exception(
                "Currently --known_sequence_taxonomy requires --no_assign_taxonomy to be set also")
        if args.reverse and args.output_jplace:
            raise Exception("Currently jplace_output cannot be used with --reverse")

    if (len(sys.argv) == 1 or sys.argv[1] == '-h' or sys.argv[1] == '--help'):
        print ''
        print '                ...::: SingleM v' + singlem.__version__ + ' :::...'''
        print '\n  General usage:'
        print '    pipe         -> %s' % pipe_description
        print '    summarise    -> %s' % summarise_description
        print '    renew        -> %s' % renew_description

        print '\n  Databases (of OTU sequences):'
        print '    makedb       -> %s' % makedb_description
        print '    query        -> %s' % query_description

        print '\n  Assembly and binning:'
        print '    appraise     -> %s' % appraise_description

        print '\n  Packages (to search with):'
        print '    seqs         -> %s' % seqs_description
        print '    create       -> %s' % create_description
        print '    get_tree     -> %s' % get_tree_description
        print '    regenerate   -> %s' % regenerate_description

        print '\n  Use singlem <command> -h for command-specific help.\n'\
            '  Some commands also have an extended --full_help flag.\n'
        sys.exit(0)
    else:
        # Determine whether --full_help was specified before argument parsing.
        if '--%s' % MyParser.FULL_HELP_FLAG in sys.argv and \
           sys.argv[1] in subparser_name_to_parser:
            subcommand = sys.argv[1]
            subparser = subparser_name_to_parser[subcommand]
            print(argparse.ArgumentParser.format_help(subparser))
            sys.exit(0)
        else:
            args = parser.parse_args()

    if args.debug:
        loglevel = logging.DEBUG
    elif args.quiet:
        loglevel = logging.ERROR
    else:
        loglevel = logging.INFO
    logging.basicConfig(level=loglevel, format='%(asctime)s %(levelname)s: %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p')

    if args.subparser_name == 'seqs':
        seqs(args)
    elif args.subparser_name=='pipe':
        validate_pipe_args(args)
        singlem.pipe.SearchPipe().run(
            sequences = args.sequences,
            reverse_read_files = args.reverse,
            otu_table = args.otu_table,
            archive_otu_table = args.archive_otu_table,
            threads = args.threads,
            known_otu_tables = args.known_otu_tables,
            assignment_method = args.assignment_method,
            output_jplace = args.output_jplace,
            evalue = args.evalue,
            min_orf_length = args.min_orf_length,
            restrict_read_length = args.restrict_read_length,
            filter_minimum_protein = args.filter_minimum_protein,
            filter_minimum_nucleotide = args.filter_minimum_nucleotide,
            output_extras = args.output_extras,
            include_inserts = args.include_inserts,
            working_directory = args.working_directory,
            force = args.force,
            singlem_packages = args.singlem_packages,
            assign_taxonomy = not args.no_assign_taxonomy,
            known_sequence_taxonomy = args.known_sequence_taxonomy)

    elif args.subparser_name=='renew':
        validate_pipe_args(args)

        otus = StreamingOtuTableCollection()
        for o in args.input_archive_otu_tables:
            otus.add_archive_otu_table_file(o)

        Renew().renew(
            input_otus=otus,

            sequences = args.sequences,
            otu_table = args.otu_table,
            archive_otu_table = args.archive_otu_table,
            threads = args.threads,
            known_otu_tables = args.known_otu_tables,
            assignment_method = args.assignment_method,
            output_jplace = args.output_jplace,
            evalue = args.evalue,
            min_orf_length = args.min_orf_length,
            restrict_read_length = args.restrict_read_length,
            filter_minimum_protein = args.filter_minimum_protein,
            filter_minimum_nucleotide = args.filter_minimum_nucleotide,
            output_extras = args.output_extras,
            include_inserts = args.include_inserts,
            working_directory = args.working_directory,
            force = args.force,
            singlem_packages = args.singlem_packages,
            assign_taxonomy = not args.no_assign_taxonomy,
            known_sequence_taxonomy = args.known_sequence_taxonomy)

    elif args.subparser_name=='makedb':
        if not args.otu_tables and not args.archive_otu_tables:
            raise Exception("Making a database requires input OTU tables or archive tables")
        otus = StreamingOtuTableCollection()
        if args.otu_tables:
            for o in args.otu_tables:
                otus.add_otu_table_file(o)
        if args.archive_otu_tables:
            for o in args.archive_otu_tables:
                otus.add_archive_otu_table_file(o)
        singlem.sequence_database.SequenceDatabase.create_from_otu_table\
            (args.db, otus, args.clustering_divergence)
    elif args.subparser_name=='query':
        querier = Querier()
        if args.db and args.subject_otu_tables:
            raise Exception("Cannot specify both --db and --subject_otu_tables")

        if args.db:
            if args.dump:
                logging.debug("Dumping SingleM database {}".format(args.db))
                SequenceDatabase.dump(args.db)
            elif args.sample_names or args.taxonomy:
                if args.taxonomy and args.sample_names:
                    raise Exception("Cannot specify both sample names and taxonomy")
                querier.print_samples(
                    db=args.db,
                    sample_names = args.sample_names,
                    taxonomy = args.taxonomy,
                    output_io=sys.stdout)
            else:
                args.threads = 1
                if args.db:
                    querier.query(
                        db = args.db,
                        query_sequence = args.query_sequence,
                        max_divergence = args.max_divergence,
                        output_style = 'sparse',#args.otu_table_type, # There is only sparse type atm.
                        query_otu_table = args.query_otu_table,
                        query_fasta = args.query_fasta,
                        num_threads = args.threads)
        elif args.subject_otu_tables:
            if args.taxonomy:
                raise Exception("--taxonomy only works with --db")
            if args.sample_names:
                raise Exception("--sample_names only works with --db")
            if args.dump:
                raise Exception("--dump only works with --db")
            otus = OtuTableCollection()
            for o in args.subject_otu_tables:
                otus.add_otu_table(open(o))
            querier.query_subject_otu_table(
                query_sequence = args.query_sequence,
                max_divergence = args.max_divergence,
                query_otu_table = args.query_otu_table,
                query_fasta = args.query_fasta,
                subject_otu_collection = otus,
                output_style = 'sparse')# There is only sparse type atm.
        else:
            raise Exception("Either --db or --subject_otu_tables must be specified.")
    elif args.subparser_name == 'summarise':
        num_output_types = 0
        if args.strain_overview_table: num_output_types += 1
        if args.krona: num_output_types += 1
        if args.unifrac_by_otu: num_output_types += 1
        if args.unifrac_by_taxonomy: num_output_types += 1
        if args.biom_prefix: num_output_types += 1
        if args.output_otu_table: num_output_types += 1
        if args.clustered_output_otu_table: num_output_types += 1
        if args.rarefied_output_otu_table: num_output_types += 1
        if args.wide_format_otu_table: num_output_types += 1
        if num_output_types != 1:
            raise Exception("Exactly 1 output type must be specified, sorry, %i were provided" % num_output_types)
        if not args.input_otu_tables and not args.input_archive_otu_tables:
            raise Exception("Summary requires input OTU tables or archive tables")

        otus = OtuTableCollection()
        otus.set_target_taxonomy_by_string(args.taxonomy)
        if args.input_otu_tables:
            for o in args.input_otu_tables:
                otus.add_otu_table(open(o))
        if args.input_archive_otu_tables:
            for o in args.input_archive_otu_tables:
                otus.add_archive_otu_table(open(o))

        if args.cluster:
            logging.info("Clustering OTUs with clustering identity %f.." % args.cluster_id)
            o2 = OtuTableCollection()
            o2.otu_table_objects = [list(Clusterer().each_cluster(otus, args.cluster_id))]
            otus = o2
            logging.info("Finished clustering")

        if args.collapse_coupled:
            if not args.output_otu_table:
                raise Exception("Collapsing is currently only implemented for regular OTU table outputs.")
            logging.info("Collapsing forward and reverse read OTU tables")
            o2 = OtuTableCollection()
            o2.otu_table_objects.append(otus.collapse_coupled())
            otus = o2

        if args.krona:
            Summariser.summarise(
                table_collection = otus,
                krona_output = args.krona)
        elif args.unifrac_by_otu:
            Summariser.write_unifrac_by_otu_format_file(
                table_collection = otus,
                unifrac_output_prefix = args.unifrac_by_otu)
        elif args.unifrac_by_taxonomy:
            Summariser.write_unifrac_by_taxonomy_format_file(
                table_collection = otus,
                unifrac_output_prefix = args.unifrac_by_taxonomy)
        elif args.biom_prefix:
            Summariser.write_biom_otu_tables(
                table_collection = otus,
                biom_output_prefix = args.biom_prefix)
        elif args.output_otu_table:
            Summariser.write_otu_table(
                table_collection = otus,
                output_table_io = open(args.output_otu_table,'w'),
                output_extras = args.output_extras)
        elif args.wide_format_otu_table:
            Summariser.write_wide_format_otu_table(
                table_collection = otus,
                output_table_io = open(args.wide_format_otu_table,'w'))
        elif args.strain_overview_table:
            StrainSummariser().summarise_strains(
                table_collection = otus,
                output_table_io = open(args.strain_overview_table,'w'))
        elif args.clustered_output_otu_table:
            if not args.cluster:
                raise Exception("If --clustered_output_otu_table is set, then clustering (--cluster) must be applied")
            Summariser.write_clustered_otu_table(
                table_collection = otus,
                output_table_io = open(args.clustered_output_otu_table,'w'))
        elif args.rarefied_output_otu_table:
            Summariser.write_rarefied_otu_table(
                table_collection = otus,
                output_table_io = open(args.rarefied_output_otu_table,'w'),
                number_to_choose = args.number_to_choose)

        else: raise Exception("Programming error")
        logging.info("Finished")

    elif args.subparser_name == 'create':
        PackageCreator().create(input_graftm_package = args.input_graftm_package,
                                output_singlem_package = args.output_singlem_package,
                                hmm_position = args.hmm_position,
                                window_size = args.window_size,
                                force = args.force)
    elif args.subparser_name == 'appraise':
        appraiser = Appraiser()

        metagenomes = OtuTableCollection()
        for table in args.metagenome_otu_tables:
            with open(table) as f:
                metagenomes.add_otu_table(f)

        if args.genome_otu_tables:
            genomes = OtuTableCollection()
            for table in args.genome_otu_tables:
                with open(table) as f:
                    genomes.add_otu_table(f)
        else:
            genomes = None
        if args.assembly_otu_tables:
            assemblies = OtuTableCollection()
            for table in args.assembly_otu_tables:
                with open(table) as f:
                    assemblies.add_otu_table(f)
        else:
            assemblies = None

        if genomes is None and assemblies is None:
            raise Exception("Appraise must be run with genomes and/or assemblies.")

        app = appraiser.appraise(genome_otu_table_collection=genomes,
                                 metagenome_otu_table_collection=metagenomes,
                                 assembly_otu_table_collection=assemblies,
                                 sequence_identity=(args.sequence_identity if args.imperfect else None))

        if args.output_binned_otu_table:
            output_binned_otu_table_io = open(args.output_binned_otu_table,'w')
        if args.output_unbinned_otu_table:
            output_unbinned_otu_table_io = open(args.output_unbinned_otu_table,'w')
        if args.output_assembled_otu_table:
            output_assembled_otu_table_io = open(args.output_assembled_otu_table,'w')
        if args.output_unaccounted_for_otu_table:
            output_unaccounted_for_otu_table_io = open(args.output_unaccounted_for_otu_table,'w')

        if args.plot_basename or args.plot:
            if args.plot and args.plot_basename:
                raise Exception("Cannot specify both --plot and --plot_basename")
            if args.plot:
                app.plot(
                    cluster_identity=args.sequence_identity,
                    doing_assembly=assemblies is not None,
                    doing_binning=genomes is not None,
                    gene_to_plot=args.plot_marker,
                    output_svg=args.plot)
            else:
                app.plot(
                    output_svg_base=args.plot_basename,
                    cluster_identity=args.sequence_identity,
                    doing_assembly=assemblies is not None,
                    doing_binning=genomes is not None)

        appraiser.print_appraisal(
            app,
            doing_binning = genomes is not None,
            doing_assembly = assemblies is not None,
            binned_otu_table_io=output_binned_otu_table_io if args.output_binned_otu_table else None,
            unbinned_otu_table_io=output_unbinned_otu_table_io if args.output_unbinned_otu_table else None,
            assembled_otu_table_io=output_assembled_otu_table_io if args.output_assembled_otu_table else None,
            unaccounted_for_otu_table_io=output_unaccounted_for_otu_table_io \
            if args.output_unaccounted_for_otu_table else None)
        if args.output_binned_otu_table: output_binned_otu_table_io.close()
        if args.output_unbinned_otu_table: output_unbinned_otu_table_io.close()
        if args.output_assembled_otu_table: output_assembled_otu_table_io.close()
        if args.output_unaccounted_for_otu_table: output_unaccounted_for_otu_table_io.close()

    elif args.subparser_name == 'regenerate':
        Regenerator().regenerate(
            input_singlem_package = args.input_singlem_package,
            output_singlem_package = args.output_singlem_package,
            working_directory = args.working_directory,
            euk_sequences = args.euk_sequences,
            euk_taxonomy = args.euk_taxonomy,
            intermediate_archaea_graftm_package = args.intermediate_archaea_graftm_package,
            intermediate_bacteria_graftm_package = args.intermediate_bacteria_graftm_package,
            input_taxonomy = args.input_taxonomy,
            type_strains_list_file = args.type_strains_list_file)

    elif args.subparser_name == 'get_tree':
        hmm_db = HmmDatabase(args.singlem_packages)
        print("marker\ttree_file")
        for pkg in hmm_db.singlem_packages:
            print("{}\t{}".format(
                pkg.graftm_package_basename(),
                os.path.abspath(
                    pkg.graftm_package().reference_package_tree_path())))

    elif args.subparser_name == 'chance':
        chancer = Chancer()
        metagenomes = OtuTableCollection()
        for table in args.otu_tables:
            with open(table) as f:
                metagenomes.add_otu_table(f)
        chancer.run_and_print(
            metagenomes = metagenomes,
            target_taxonomy = Taxonomy.split_taxonomy(args.taxonomy))
    else:
        raise Exception("Programming error")


