#!/home/travis/virtualenv/python3.6.3/bin/python
"""Simple commandline interface for parsing PDF to HTML."""
import argparse
import logging
import os
import pdftotree

if __name__ == '__main__':
    parser = argparse.ArgumentParser(
        description="""
        Script to extract tree structure from PDF files. Takes a PDF as input
        and outputs an HTML-like representation of the document's structure. By
        default, this conversion is done using heuristics. However, a model can
        be provided as a parameter to use a machine-learning-based approach.
        """,
        usage="%(prog)s [options] pdf_file")
    parser.add_argument(
        '-mt',
        '--model_type',
        type=str,
        default=None,
        choices=['vision', 'ml', None],
        help="Model type to use.  None (default) for heuristics approach.")
    parser.add_argument(
        '-m',
        '--model_path',
        type=str,
        default=None,
        help="Pretrained model, generated by extract_tables tool")
    parser.add_argument(
        'pdf_file',
        type=str,
        help="PDF file name for which tree structure needs to be extracted")
    parser.add_argument(
        '-o',
        '--output',
        type=str,
        help=
        "Path where tree structure should be saved. If none, HTML is printed to stdout."
    )
    parser.add_argument(
        '-f',
        '--favor_figures',
        type=str,
        help=
        "Whether figures must be favored over other parts such as tables and section headers",
        default="True")
    parser.add_argument(
        '-V',
        '--visualize',
        dest='visualize',
        action='store_true',
        help="Whether to output visualization images for the tree")
    parser.add_argument(
        '-d',
        '--dry-run',
        dest='dry_run',
        action='store_true',
        help="Run pdftotree, but do not save any output or print to console.")
    parser.add_argument(
        '-v',
        '--verbose',
        dest='verbose',
        action='store_true',
        help="Output INFO level logging.")
    parser.add_argument(
        '-vv',
        '--veryverbose',
        dest='debug',
        action='store_true',
        help="Output DEBUG level logging.")
    parser.set_defaults(visualize=False)
    args = parser.parse_args()

    if args.debug:
        log_level = logging.DEBUG
    elif args.verbose:
        log_level = logging.INFO
    else:
        log_level = logging.ERROR

    if bool(args.model_type) != bool(args.model_path):
        parser.error(
            "Both a model_type and a model_path must be provided together.")
    elif args.model_type and not os.path.exists(args.model_path):
        parser.error("A valid path to a pretrained model must be provided.")

    # Configure logging for this application
    log = logging.getLogger('pdftotree')
    log.propagate = 0  # prevent propagation to the root logger
    ch = logging.StreamHandler()
    log.setLevel(log_level)
    ch.setLevel(log_level)
    formatter = logging.Formatter("[%(levelname)s] %(name)s - %(message)s")
    ch.setFormatter(formatter)
    log.addHandler(ch)

    if args.dry_run:
        print("This is just a dry run. No HTML will be output.")
        args.output = None

    # Call the main routine
    result = pdftotree.parse(args.pdf_file, args.output, args.model_type,
                             args.model_path, args.favor_figures,
                             args.visualize)

    if result:
        if not args.dry_run:
            print(result)
    else:
        print("HTML output to {}".format(args.output))
