# Copyright (c) 2016, Mayo Clinic
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without modification,
# are permitted provided that the following conditions are met:
#
# Redistributions of source code must retain the above copyright notice, this
#     list of conditions and the following disclaimer.
#
#     Redistributions in binary form must reproduce the above copyright notice,
#     this list of conditions and the following disclaimer in the documentation
#     and/or other materials provided with the distribution.
#
#     Neither the name of the <ORGANIZATION> nor the names of its contributors
#     may be used to endorse or promote products derived from this software
#     without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
# IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
# OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
# OF THE POSSIBILITY OF SUCH DAMAGE.
import argparse
import json
import logging
import os
import re
import sys
from typing import List

from dirlistproc import DirectoryListProcessor
import jsonasobj
from pyjxslt import Gateway

if __name__ == "__main__":
    sys.path.append(os.path.join(os.path.join(os.getcwd(), os.path.dirname(__file__)), '..'))
from dbgap.xform_dbgap import xform_dbgap_dataset
from dbgap.json_to_rdf import json_to_rdf
from dbgap.constants import *
from dbgap.dbgap_study_information import biocaddie_json, get_study_information, StudyIdentifier
from dbgap.file_downloader import FileDownloader


logNames = [logging.getLevelName(l) for l in [logging.DEBUG, logging.INFO, logging.WARNING, logging.ERROR]]

root_dir = os.path.join(os.path.dirname(__file__), '..')


def json_needed(opts: argparse.Namespace) -> bool:
    """ Return true if we need to update the jsonƒ
    :param opts: input options
    :return:
    """
    return 'j' in opts.action or ('r' in opts.action and 'd' in opts.action)


def setup_defaults(opts: argparse.Namespace) -> None:
    """ Set up the defaults for input, output and make the various directories
    :param opts: input options
    """
    if 'a' in opts.action:
        opts.action = (['d', 'j', 'r', 'a'])
    opts.identifiers = StudyIdentifier(opts.studyid, opts.version, opts.pvalue)
    opts.data_dir = os.path.join(root_dir, 'data', opts.identifiers.studyid)
    if not opts.indir:
        opts.indir = os.path.join(opts.data_dir, 'xml')
    if not opts.outdir:
        opts.outdir = os.path.join(opts.data_dir, 'json')
    if not opts.rdfdir:
        opts.rdfdir = os.path.join(opts.data_dir, 'ttl')
    os.makedirs(opts.indir, exist_ok=True)
    os.makedirs(opts.outdir, exist_ok=True)

    if 'r' in opts.action and not opts.context:
        print("JSON-LD context server URI required if generating rdf", file=sys.stderr)
        sys.exit(2)

    if 'r' in opts.action:
        os.makedirs(opts.rdfdir, exist_ok=True)

    if json_needed(opts):
        opts.gw = Gateway(opts.port) if opts.port else Gateway()


def setup_logging(opts: argparse.Namespace) -> None:
    """ Set up the logging file
    :param opts: input options
    """
    if not opts.logfile:
        logdir = os.path.join(os.path.dirname(__file__), '..', 'logs')
        os.makedirs(logdir, exist_ok=True)
        opts.logfile = os.path.join(os.path.dirname(__file__), '..', 'logs', opts.identifiers.studyid + '.log')
    logging.basicConfig(filename=opts.logfile, level=opts.loglevel, filemode='w',
                        format='%(asctime)s  %(levelname)s - %(message)s')


def get_data_tables(opts: argparse.Namespace) -> List[str]:
    return [re.sub(r'^phs.{6}\.v.*?\.(pht.{6}\.v.+?)\..*', '\\1', fn)
            for fn in os.listdir(opts.indir) if os.path.isfile(os.path.join(opts.indir, fn)) and
            fn.endswith('data_dict.xml')]


def write_as_rdf(jsonf: str, outfile: str, schema_uri: str):
    g = json_to_rdf(jsonasobj.loads(jsonf), schema_uri)
    open(outfile, 'w').write(g.serialize(format="turtle").decode('utf-8'))


def proc_xml_file(infile: str, outfile: str, opts: argparse.Namespace) -> bool:
    """ Convert a downloaded XML file into json and/or rdf if requested
    :param infile: input XML file
    :param outfile: target json or rdf file (will replace '.json' suffix with '.ttl' if rdf is requested
    :param opts: User options
    :return: True if processing is successful, False if a problem occurred
    """
    if json_needed(opts):
        with open(infile) as inf:
            jsoned = opts.gw.to_json(inf.read())
        if not jsoned or jsoned.startswith("ERROR:"):
            logging.error("File %s:  %s" % (infile, jsoned if jsoned else "Unspecified conversion error"))
            return False
        jsonobj = xform_dbgap_dataset(jsonasobj.loads(jsoned), infile)
        if 'j' in opts.action:
            with open(outfile, 'w') as outf:
                json.dump(json.loads(jsonobj._as_json), outf, indent='   ')

    # If doing json to rdf, convert the json into a graph
    if 'r' in opts.action:
        with open(outfile) as jsonf:
            rdf_fname = os.path.join(opts.rdfdir, re.sub(r'.json$', '.ttl', os.path.basename(outfile)))
            write_as_rdf(jsonf.read(), rdf_fname, opts.context)

    return True


def download_study(opts: argparse.Namespace) -> None:
    """ Download the study to the input (XML) directory if requested
    :param opts: user options
    """
    setup_defaults(opts)
    setup_logging(opts)

    if 'd' in opts.action:
        open(os.path.join(opts.indir, 'StudyDescription.xml'), 'w').write(get_study_information(opts.identifiers))
        dbgap_directory = STUDY_DIRECTORY_TEMPLATE % opts.identifiers.__dict__
        nfiles = FileDownloader(opts.ftproot).download_dir(dbgap_directory, opts.indir,
                                                           file_filtr=lambda s: s.endswith('data_dict.xml'))
        logging.info("%d files downloaded" % nfiles)

    # Process the study description if needed
    if json_needed(opts):
        logging.info("Writing StudyDescription.json")
        json_txt = opts.gw.to_json(open(os.path.join(opts.indir, 'StudyDescription.xml')).read())
        open(os.path.join(opts.outdir, 'StudyDescription.json'), 'w').write(json_txt)
        json_obj = jsonasobj.loads(json_txt)
        biocaddie_obj = biocaddie_json(opts.identifiers, json_obj, get_data_tables(opts))
        open(os.path.join(opts.outdir, 'StudyDescription.biocaddie.json'), 'w').write(biocaddie_obj._as_json_dumps())
    if 'r' in opts.action:
        logging.info("Writing StudyDescription.ttl")
        json_txt = open(os.path.join(opts.outdir, 'StudyDescription.biocaddie.json')).read()
        write_as_rdf(json_txt, os.path.join(opts.rdfdir, 'StudyDescription.ttl'), opts.context)


def addargs(parser: argparse.ArgumentParser) -> None:
    """ Add the application specific arguments to the basic directory list processor inputs
    :param parser: parser to add additional arguments to
    """
    parser.add_argument("studyid", help="DBGap study id (example: 774", type=int)
    parser.add_argument("action", help="(d)ownload xml, convert to (j)son-ld, convert to (r)df, (a)ll (default)",
                        choices=['d', 'j', 'r', 'a'], nargs='*', default='a')
    parser.add_argument("-v", "--version", help="Study version number (default: 1)", type=int, default=1)
    parser.add_argument("-p", "--pvalue", help="Study participant set number (default: 1)", type=int, default=1)
    parser.add_argument("--ftproot", help="DBGap ftp server root (default: %s)" % DBGAP_FTP_SERVER,
                        default=DBGAP_FTP_SERVER)
    parser.add_argument("-r", "--rdfdir", help="Output RDF directory (default: data/<studyid>/ttl")
    parser.add_argument("--logfile", help="Log file. Default is 'logs/<studyid>.log'")
    parser.add_argument("--loglevel", help="Logging level. Default is %s" % logging.getLevelName(logging.INFO),
                        default=logging.getLevelName(logging.INFO), choices=logNames)
    parser.add_argument("--port", help="XSLT gateway port", type=int)
    parser.add_argument("-c", "--context", help="Base URI for json-ld context if creating rdf")


def main(argv: List[str]) -> None:
    """
    Process a dbgap dataset and convert it to RDF
    :param argv: input options (see addargs + dirlistproc documentation)
    """
    dlp = DirectoryListProcessor(argv, "Download a dbGAP dataset and convert it to RDF ", 'xml', 'json',
                                 addargs=addargs, postparse=download_study)
    if 'j' in dlp.opts.action or 'r' in dlp.opts.action:
        nfiles, nsuccess = dlp.run(proc_xml_file, file_filter=lambda s: s.endswith('data_dict.xml'))
        logging.info("Total files processed=%d Errors=%d" % (nfiles, nfiles - nsuccess))


if __name__ == '__main__':
    main(sys.argv[1:])
