#!/usr/bin/env python
## emacs: -*- mode: python; py-indent-offset: 4; tab-width: 4; indent-tabs-mode: nil -*-
# ex: set sts=4 ts=4 sw=4 noet:
# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
#
#   See COPYING file distributed along with the datalad package for the
#   copyright and license terms.
#
# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
"""
"""

import sys
import os
from pathlib import Path
import subprocess
from urllib.parse import urlparse
import json
from shutil import rmtree




class RCloneRemote(object):
    def __init__(self,
                 gitdir,
                 remote,
                 url,
                 instream=sys.stdin,
                 outstream=sys.stdout,
                 errstream=sys.stderr):
        self.parsed_url = urlparse(url)
        self.remote = remote
        # internal logic relies on workdir to be an absolute path
        self.workdir = Path(gitdir, 'rclone', remote).resolve()
        self.repodir = self.workdir / 'repo'
        self.marks_git = self.workdir / "git.marks"
        self.marks_rclone = self.workdir / "rclone.marks"
        self.refspec = "refs/heads/*:refs/rclone/{}/*".format(remote)
        self.instream = instream
        self.outstream = outstream
        self.errstream = errstream

        # TODO delay
        self.workdir.mkdir(parents=True, exist_ok=True)
        self.marks_git.touch()
        self.marks_rclone.touch()

    def log(self, *args):
        print(*args, file=self.errstream)

    def send(self, msg):
        print(msg, end='', file=self.outstream, flush=True)

    def get_remote_refs(self):
        """Report remote refs

        There are kept in a dedicated "refs" file at the remote.

        Returns
        -------
        str
        """
        url = self.parsed_url
        cat = subprocess.run([
            'rclone', 'cat', '{}:{}/refs'.format(url.netloc, url.path)],
            stdout=subprocess.PIPE,
            # capture errors, not having refs is a normal thing
            stderr=subprocess.PIPE,
            universal_newlines=True,
        )
        if cat.returncode:
            # listing failed, nothing there
            return ''
        return cat.stdout if cat.stdout else ''

    def get_remote_state(self):
        """Return a dict with hashes for the remote repo archive or None

        Raises
        ------
        RuntimeError
          If no connection to the rclone remote could be made, or an unkown
          error occurred.
        """
        url = self.parsed_url
        # request listing of the remote dir with the repo archive
        lsjson = subprocess.run([
            'rclone', 'lsjson', '{}:{}'.format(url.netloc, url.path),
            '--files-only', '--no-modtime', '--hash'],
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
        )
        if lsjson.returncode == 3:
            # ls didn't find a repo at the remote end, but could talk to
            # the rclone remote itself -> nothing to there
            return None
        elif lsjson.returncode:
            raise RuntimeError(
                'Accesing rclone remote failed: {}'.format(lsjson.stderr))
        ls = {i['Path']: i for i in json.loads(lsjson.stdout)}
        return ls.get('repo.7z', {}).get('Hashes', None)

    def mirror_repo_if_needed(self):
        """Ensure a local Git repo mirror of the one archived at the remote.
        """
        # TODO acquire and release lock
        url = self.parsed_url
        # stamp file with last syncronization IDs
        synced = self.workdir / 'synced'
        repo_hashes = None
        if synced.exists():
            repo_hashes = self.get_remote_state()
            if repo_hashes is None:
                # we had it sync'ed before, but now it is gone from the
                # remote -- we have all that is left locally
                synced.unlink()
                # sync stamp removed, but leave any local mirror untouched
                # it may be the only copy left
                return
            # compare states, try to be robust and take any hash match
            # unclear when which hash is available, but any should be good
            last_hashes = json.load(synced.open())
            if any(repo_hashes.get(k, None) == v
                    for k, v in last_hashes.items()):
                # local sync matches remote situation
                return
        if repo_hashes is None:
            # in case we never sync'ed, obtain the ID info prior download
            # so we have it, whenever the download succeeded
            repo_hashes = self.get_remote_state()
        if repo_hashes is None:
            # there is nothing at the remote end
            return
        sync_dir = self.workdir / 'sync'
        self.log('Downloading repository archive')
        subprocess.run([
            'rclone', 'copy', '{}:{}/repo.7z'.format(url.netloc, url.path),
            str(sync_dir)],
            check=True,
        )
        repo_archive = sync_dir / 'repo.7z'
        self.log('Extracting repository archive')
        if self.repodir.exists():
            # if we extract, we cannot tollerate left-overs
            rmtree(str(self.repodir), ignore_errors=True)
        subprocess.run([
            '7z', 'x', str(repo_archive)],
            cwd=str(self.workdir),
            stdout=subprocess.PIPE,
            check=True,
        )
        rmtree(str(sync_dir), ignore_errors=True)
        # update sync stamp only after everything else was successful
        synced.write_text(json.dumps(repo_hashes))

    def import_refs_from_mirror(self, refs):
        """Uses fast-export to pull refs from the local repository mirror

        The mirror must exist, when this functional is called.
        """
        if not self.repodir.exists():
            # this should not happen.If we get here, it means that Git
            # was promised some refs to be available, but there the mirror
            # to pull them from did not materialize. Crash at this point,
            # any recovery form such a situation should have happened
            # before
            raise RuntimeError(
                'rclone repository mirror not found')
        env = os.environ.copy()
        env['GIT_DIR'] = str(self.repodir)
        subprocess.run([
            'git', 'fast-export',
            '--import-marks={}'.format(str(self.marks_rclone)),
            '--export-marks={}'.format(str(self.marks_rclone)),
            '--refspec', self.refspec] + refs,
            env=env,
            check=True,
        )

    def format_refs_in_mirror(self):
        """Format a report on refs in the mirror like LIST wants it

        If the mirror is empty, the report will be empty.
        """
        refs = ''
        if not self.repodir.exists():
            return refs
        env = os.environ.copy()
        env['GIT_DIR'] = str(self.repodir)
        refs += subprocess.run([
            'git', 'for-each-ref', "--format=%(objectname) %(refname)"],
            env=env,
            check=True,
            stdout=subprocess.PIPE,
            universal_newlines=True,
        ).stdout
        HEAD_ref = subprocess.run([
            'git', 'symbolic-ref', 'HEAD'],
            env=env,
            check=True,
            stdout=subprocess.PIPE,
            universal_newlines=True,
        ).stdout
        refs += '@{} HEAD\n'.format(HEAD_ref.strip())
        return refs

    def export_to_rclone(self):
        """Export a fast-export stream to rclone.

        The stream is fast-import'ed into a local repository mirror first.
        If not mirror repository exists, an empty one is created. The mirror
        is then 7z'ed and uploaded.
        """
        # TODO acquire and release lock
        url = self.parsed_url
        env = os.environ.copy()
        env['GIT_DIR'] = str(self.repodir)
        if not self.repodir.exists():
            # ensure we have a repo
            self.repodir.mkdir()
            subprocess.run([
                'git', 'init', '--bare', '--quiet'],
                env=env,
                check=True,
            )
        # which refs did we have in the mirror before the import?
        before = subprocess.run([
            'git', 'for-each-ref', "--format= %(refname) %(objectname) "],
            env=env,
            check=True,
            stdout=subprocess.PIPE,
            universal_newlines=True,
        ).stdout
        # perform actual import
        subprocess.run([
            'git', 'fast-import', '--quiet',
            '--import-marks={}'.format(str(self.marks_rclone)),
            '--export-marks={}'.format(str(self.marks_rclone))],
            env=env,
            check=True,
        )
        # which refs do we have now?
        after = subprocess.run([
            'git', 'for-each-ref', "--format= %(refname) %(objectname) "],
            env=env,
            check=True,
            stdout=subprocess.PIPE,
            universal_newlines=True,
        ).stdout
        # figure out if anything happened
        upload_failed_marker = (self.workdir / 'upload_failed')
        if upload_failed_marker.exists():
            # we have some unsync'ed data from a previous attempt
            updated_refs = json.load(upload_failed_marker.open())
            need_sync = True
            upload_failed_marker.unlink()
        else:
            updated_refs = []
            need_sync = False
        for line in after.splitlines():
            if line in before:
                # no change in ref
                continue
            else:
                updated_refs.append(line.strip().split()[0])
                need_sync = True

        # TODO acknowledge a failed upload
        if not need_sync:
            return
        subprocess.run([
            'git', 'gc'],
            env=env,
            # who knows why this would fail, but it would not be then end
            # of the world
            check=False,
        )
        # prepare upload pack
        sync_dir = self.workdir / 'sync'
        if not sync_dir.exists():
            sync_dir.mkdir()
        subprocess.run([
            '7z', 'u', str(sync_dir / 'repo.7z'), 'repo'],
            cwd=str(self.workdir),
            stdout=subprocess.PIPE,
            check=True,
        )
        # dump refs for a later LIST of the remote
        (sync_dir / 'refs').write_text(
            self.format_refs_in_mirror())

        self.log('Upload repository archive')
        sync_repo = subprocess.run([
            'rclone', 'sync', str(sync_dir),
            '{}:{}'.format(url.netloc, url.path)],
        )
        if sync_repo.returncode:
            # TODO we could retry...
            # make a record which refs failed to update/upload
            upload_failed_marker.write_text(
                json.dumps(updated_refs))
            # to not report refs as successfully updated
            return
        else:
            # we no longer need the repo archive, we keep the actual
            # repo mirror
            rmtree(str(sync_dir), ignore_errors=True)

        # upload was successful, so we can report that
        for ref in updated_refs:
            print('ok {}'.format(ref))

        # lastly update the sync stamp to avoid redownload of what was
        # just uploaded
        synced = self.workdir / 'synced'
        repo_hashes = self.get_remote_state()
        if repo_hashes is None:
            self.log('Failed to update sync stamp after successful upload')
        else:
            synced.write_text(json.dumps(repo_hashes))

    def communicate(self):
        """Implement the necessary pieces of the git-remote-helper protocol

        Uses the input, output and error streams configured for the
        class instance.
        """
        for line in self.instream:
            if line == '\n':
                # orderly exit command
                return
            elif line == 'capabilities\n':
                self.send(
                    'import\n'
                    'export\n'
                    'refspec {refspec}\n'
                    '*import-marks {marks}\n'
                    '*export-marks {marks}\n'
                    'signed-tags\n'
                    '\n'.format(
                        refspec=self.refspec,
                        marks=str(self.marks_git))
                )
            elif line == 'list\n':
                self.send('{}\n'.format(self.get_remote_refs()))
            elif line.startswith('import '):
                # data is being imported from rclone
                self.mirror_repo_if_needed()
                refs = [line[7:].strip()]
                while True:
                    line = self.instream.readline()
                    if not line.startswith('import '):
                        break
                    refs.append(line[7:].strip())
                self.send(
                    'feature import-marks={marks}\n'
                    'feature export-marks={marks}\n'
                    'feature done\n'.format(
                        marks=str(self.marks_git))
                )
                self.import_refs_from_mirror(refs)
                self.send('done\n')
            elif line == 'export\n':
                # data is being exported to rclone
                self.mirror_repo_if_needed()
                self.export_to_rclone()
                self.send(
                    '\n'
                )
            else:
                self.log('UNKNOWN COMMAND', line)
                # unrecoverable error
                return


def main():
    if len(sys.argv) < 3:
        raise ValueError("Usage: git-remote-rclone REMOTE-NAME URL")

    remote, url = sys.argv[1:3]
    # no fallback, must be present
    gitdir = os.environ['GIT_DIR']

    rclone = RCloneRemote(gitdir, remote, url)
    rclone.communicate()


if __name__ == '__main__':
    main()
