#!/usr/bin/env python

import sys
import os
import urllib2
import time
import string
import subprocess
import getopt

class CfchecksWrapper(object):

    """
    cf-checker [options] file1 [file2...]
    cf-checker [options] --update_only
    cf-checker -h|--help

    Description:
      A wrapper to the CF checker, which will keep local copies of the area 
      types and standard name tables, and fetch fresh copies as required if 
      the local copies are too old.

      If --update_only is specified, then it will update the area
      types and standard name tables without checking any data files.
      In this case the tables are updated regardless of their timestamp
      (though this can be overridden with the -t flag - see below).

      Other options:

          -a|--area_types <path or url>:
              location of the CF area types table
       
          -s|--cf_standard_names <path or url>:
              location of the CF standard name table
       
          -u|--udunits <path>:
              location of the udunits2.xml file

          -v|--version <version>:
              CF version to check against

          -t|--max_age <days>
              maximum allowable age of the tables before downloading
              a fresh copy (floating point number permitted).
              The default is 1 day when checking data files, 
              and is 0 if --update_only is specified.

          -d|--spool_dir <path>:
              directory to use for local copies of the area-types
              and standard-name tables
              default = /var/spool/cf-checker

          --verbose:
              increase verbosity

          -h|--help:
              prints this help text and the help text for the 
              underlying checker

      Note that the datafiles to check are optional: the wrapper can be run 
      without them to just download the table files as necessary.

      On a multi-user system, it may be desirable to remove world write 
      permission from the spool directory and to set up a cron job to 
      run this wrapper with --update_only as a user that has write
      access to it.
"""

    def __init__(self,
                 spool_dir = '/var/spool/cf-checker',
                 standard_names_key = 'CF_STANDARD_NAMES',
                 standard_names_url = 'http://cfconventions.org/Data/cf-standard-names/current/src/cf-standard-name-table.xml',
                 area_types_key = 'CF_AREA_TYPES',
                 area_types_url = 'http://cfconventions.org/Data/area-type-table/current/src/area-type-table.xml',
                 udunits_key = 'UDUNITS',
                 udunits_path = '/usr/share/udunits/udunits2.xml',
                 checker_command = 'cfchecks',
                 default_max_table_age = 1.,
                 verbose = False):

        self.spool_dir = spool_dir
        self.standard_names_key = standard_names_key
        self.standard_names_url = standard_names_url
        self.area_types_key = area_types_key
        self.area_types_url = area_types_url
        self.default_max_table_age = default_max_table_age
        self.max_table_age = None
        self.udunits_key = udunits_key,
        self.udunits_path = udunits_path
        self.checker_command = checker_command
        self.verbose = verbose
        self.cf_version = None

    def main(self):
        "Download table files as required and run the checker"

        # allow values to be passed in environment (as also understood directly by 
        # the wrapper) but for these to be overridden on the command line
        exit_value = 0
        self.set_values_from_env()
        self.parse_args(sys.argv[1:])
        try:
            self.refresh_tables()
        except (IOError, OSError) as err:
            print("Warning: error when trying to refresh local copies of xml tables: %s: %s"
                  % (err.filename, err.strerror))
            self.verbose = True
            exit_value = 1
            if self.files_to_check:
                print("\nTrying to run checker anyway...")
        for data_file in self.files_to_check:
            status = self.run_checker(data_file, cf_version = self.cf_version)
            if status != 0:
                exit_value = 1
        sys.exit(exit_value)

    def parse_args(self, cmd_args):
        """Parse values from the command line (and environment).
        """
        try:
            (opts, args) = getopt.getopt(
                cmd_args,
                'a:s:u:v:t:d:h', 
                ['area_types=', 'cf_standard_names=', 'udunits=',
                 'version=', 'max_age=', 'spool_dir=', "verbose",
                 'update_only'])
        except getopt.GetoptError:
            self.usage(status = 1)

        update_only = False
        for arg, val in opts:
            if arg in ("-a", "--area_types"):
                self.area_types_url = val
            elif arg in ("-s", "--cf_standard_names"):
                self.standard_names_url = val
            elif arg in ("-u", "--udunits"):
                self.udunits_path = val
            elif arg in ("-v", "--version"):
                self.cf_version = val
            elif arg in ("-t", "--max_age"):
                try:
                    self.max_table_age = float(val)
                except ValueError:
                    self.usage(status = 1)
            elif arg in ("-d", "--spool_dir"):
                self.spool_dir = val
            elif arg in ("-h", "--help"):
                self.usage(show_checker_usage=True, status=0)
            elif arg in ("--verbose"):
                self.verbose = True
            elif arg in ("--update_only"):
                update_only = True
            else:
                print("unrecognised option %s (coding bug?)" % arg)

        if self.max_table_age == None:
            if update_only:
                self.max_table_age = 0
            else:
                self.max_table_age = self.default_max_table_age

        self.files_to_check = args
        if not update_only and not self.files_to_check:
            print("Error: no files to check and --update_only not given run with --help (or -h) "
                  "for a full usage message")
            sys.exit(1)

        self.set_table_paths()
                
    def set_values_from_env(self):
        for env_key, self_key in ((self.standard_names_key, 'standard_names_url'), 
                                  (self.area_types_key, 'area_types_url'),
                                  (self.udunits_key, 'udunits_path')):
            try:
                val = os.environ[env_key]
            except KeyError:
                continue
            setattr(self, self_key, val)

    def set_table_paths(self):
        "Set up the local paths of the tables that are downloaded"
        self.standard_names_path = self.get_local_path(self.standard_names_url)
        self.area_types_path = self.get_local_path(self.area_types_url)
                           
    def get_local_path(self, url):
        "Return path of locally downloaded copy of a URL"
        return os.path.join(self.spool_dir, os.path.basename(url))

    def usage(self, status=None, show_checker_usage=False):
        "Print a usage message, and exit if status passed"
        print(self.__doc__)
        if show_checker_usage:
            print("\nUsage message from underlying checker (%s) follows:" % self.checker_command)
            print("-----------------------------------------------------\n")
            self.run_command([self.checker_command, "-h"])
        if status != None:
            sys.exit(status)

    def refresh_tables(self):
        "Download the standard names and area type tables as required"
        for url, local_path in ((self.standard_names_url, self.standard_names_path),
                                (self.area_types_url, self.area_types_path)):
            if self.is_remote(url) and not self.recent_copy_exists(local_path):
                self.fetch(url, local_path)
                 
    def is_remote(self, url):
        "Return True unless url is a file: URL (or just a local path)"
        return not (url.startswith("file:") or url.startswith("/") or ":" not in url)

    def recent_copy_exists(self, local_path):
        "return True if the path exists and has age less than max allowed age"
        if not os.path.exists(local_path):
            return False
        age = self.file_age(local_path)
        if self.verbose:
            print("age of %s is %s days" % (local_path, age))
        if age > self.max_table_age:
            return False
        if age < 0:
            # a future mtime does *not* constitute a recent copy - it means
            # something has gone wrong, so we say no
            return False
        return True

    def file_age(self, path):
        "return age of file in days (based on mtime)"
        try:
            return (time.time() - os.stat(path).st_mtime) / 86400.
        except OSError:
            return None

    def fetch(self, url, local_path, tmp_path=None):
        "Fetch URL to the specified local path (via tmp file for atomicity)"
        tmp_path = tmp_path or ("%s.tmp" % local_path)
        if self.verbose:
            print("downloading %s to %s" % (url,  tmp_path))
        self.download(url, tmp_path)
        if self.verbose:
            print("renaming %s to %s" % (tmp_path, local_path))
        os.rename(tmp_path, local_path)

    def download(self, url, local_path):
        "Raw download of URL to the specified local path and make world readable"
        fin = urllib2.urlopen(url)
        fout = open(local_path, "w")
        while True:
            data = fin.read()
            if not data:
                break
            fout.write(data)
        fin.close()
        fout.close()
        os.chmod(local_path, 0644)

    def run_checker(self, data_file, cf_version=None):
        "Run the CF checker on specified file against specified CF version"
        command = [self.checker_command,
                   "-s", self.standard_names_path,
                   "-a", self.area_types_path,
                   "-u", self.udunits_path]
        if cf_version:
            command += ["-v", cf_version]
        command.append(data_file)
        if self.verbose:
            print("running command: ", string.join(command))
        # for clarity, flush before running command
        sys.stdout.flush()
        sys.stderr.flush()
        status = self.run_command(command)
        if self.verbose:
            print("checker returned status %s" % status)
        return status

    def run_command(self, command):
        "run command, return status"
        # for clarity, flush before running command
        sys.stdout.flush()
        sys.stderr.flush()
        return subprocess.call(command)


if __name__ == '__main__':

    w = CfchecksWrapper()
    w.main()
