#!/usr/bin/python3
# -*- coding: utf-8 -*-

# scm (only git atm) cloning and packaging for Open Build Service
# 
# (C) 2021 by Adrian Schröter <adrian@suse.de>
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
# See http://www.gnu.org/licenses/gpl-2.0.html for full license text.

import argparse
import os
import re
import shutil
import sys
import logging
import subprocess
import tempfile
from html import escape
from typing import Dict, List, Optional, Set, TextIO, Tuple, Union
import urllib.parse
import configparser

download_assets = '/usr/lib/build/download_assets'
export_debian_orig_from_git = '/usr/lib/build/export_debian_orig_from_git'
pack_directories = False
get_assets = False
shallow_clone = True

if os.environ.get('DEBUG_SCM_BRIDGE') == "1":
    logging.getLogger().setLevel(logging.DEBUG)
if os.environ.get('OBS_SERVICE_DAEMON'):
    pack_directories = True
    get_assets = True
if os.environ.get('OSC_VERSION'):
    get_assets = True
    shallow_clone = False
os.environ['LANG'] = "C"

class ObsGit(object):

    _REGEXP = re.compile(r"^[a-zA-Z0-9\.\-\_\+]*$");

    def __init__(self, outdir: str, url: str, projectscmsync: str) -> None:
        self.outdir   = outdir
        self.revision = None
        self.subdir   = None
        self.projectscmsync = projectscmsync
        self.keep_meta = False
        self.enforced_deep_clone = False
        self.arch = []
        self.onlybuild = None
        self.url = list(urllib.parse.urlparse(url))
        self.no_lfs = False
        self.enforce_bcntsynctag = False
        # for project level mode
        self.export_files = set(["_config"])
        self.gitsubmodules: Set[str] = set()
        self.revisions: Optional[Dict[str, str]] = None

        query = urllib.parse.parse_qs(self.url[4])
        if "subdir" in query:
            self.subdir = query['subdir'][0]
            del query['subdir']
            self.url[4] = urllib.parse.urlencode(query)
        if "arch" in query:
            self.arch = query['arch']
            del query['arch']
            self.url[4] = urllib.parse.urlencode(query)
        if "enforce_bcntsynctag" in query:
            self.enforce_bcntsynctag = True
            del query['enforce_bcntsynctag']
            self.url[4] = urllib.parse.urlencode(query)
        if "keepmeta" in query:
            self.enforced_deep_clone = True
            self.keep_meta = True
            del query['keepmeta']
            self.url[4] = urllib.parse.urlencode(query)
        if "lfs" in query:
            self.no_lfs = query["lfs"] == ["0"]
            del query["lfs"]
            self.url[4] = urllib.parse.urlencode(query)
        if "onlybuild" in query:
            ob = query['onlybuild']
            self.onlybuild={ob[i]:1 for i in range(len(ob))}
            del query['onlybuild']
            self.url[4] = urllib.parse.urlencode(query)
        if self.url[5]:
            self.revision = self.url[5]
            self.url[5] = ''
        # if we keep the meta files we always want a deep clone
        if self.keep_meta:
            shallow_clone = False
        # scmtoolurl is the url we pass to the the scm tool
        scmtoolurl = self.url.copy()
        if scmtoolurl[0] and scmtoolurl[0][0:4] == 'git+':
            scmtoolurl[0] = scmtoolurl[0][4:]
        self.scmtoolurl = urllib.parse.urlunparse(scmtoolurl)

    def run_cmd_nonfatal(
            self,
            cmd: List[str],
            *,
            cwd: Optional[str]=None,
            stdout: Union[int, TextIO]=subprocess.PIPE,
            env: Optional[Dict[str, str]]=None,
    ) -> Tuple[int, str]:
        logging.debug("COMMAND: %s" % cmd)
        stderr = subprocess.PIPE
        if stdout == subprocess.PIPE:
            stderr = subprocess.STDOUT
        proc = subprocess.Popen(cmd,
                                shell=False,
                                stdout=stdout,
                                stderr=stderr,
                                cwd=cwd,
                                env=env)
        std_out = proc.communicate()[0]
        output = std_out.decode() if std_out else ''

        logging.debug("RESULT(%d): %s", proc.returncode, repr(output))
        return (proc.returncode, output)

    def run_cmd(
            self,
            cmd: List[str],
            *,
            fatal: str,
            cwd: Optional[str]=None,
            stdout: Union[int, TextIO]=subprocess.PIPE,
            env: Optional[Dict[str, str]]=None,
    ) -> str:
        returncode, output = self.run_cmd_nonfatal(cmd, cwd=cwd, stdout=stdout, env=env)
        if returncode != 0:
            print("ERROR: " + fatal + " failed: ", output)
            sys.exit(returncode)
        return output

    def do_clone_commit(self, outdir: str, include_submodules: bool=False) -> None:
        assert self.revision, "no revision is set but do_clone_commit was called"
        objectformat='--object-format=sha1'
        if len(self.revision) == 64:
            objectformat='--object-format=sha256'
        cmd = [ 'git', 'init', objectformat, outdir ]
        self.run_cmd(cmd, fatal="git init")
        cmd = [ 'git', '-C', outdir, 'remote', 'add', 'origin', self.scmtoolurl ]
        self.run_cmd(cmd, fatal="git remote add origin")
        cmd = [ 'git', '-C', outdir, 'fetch', 'origin', self.revision ]
        if shallow_clone:
            cmd += [ '--depth', '1' ]
        if include_submodules:
            # try to select specific submodule first when a subdir is given and fall back
            if self.subdir is None or self.run_cmd_nonfatal(cmd + [ "--recurse-submodules=" + self.subdir ])[0] != 0:
               cmd += [ '--recurse-submodules' ]
               self.run_cmd(cmd, fatal="git fetch")
        else:
            self.run_cmd(cmd, fatal="git fetch")
        cmd = [ 'git', '-C', outdir, 'checkout', '-q', self.revision ]
        env = {"GIT_LFS_SKIP_SMUDGE": "1", **os.environ} if self.no_lfs else None
        self.run_cmd(cmd, fatal="git checkout", env=env)
        if include_submodules:
            cmd = [ 'git', '-C', outdir, 'submodule', 'init' ]
            self.run_cmd(cmd, fatal="git submodule init")
            cmd = [ 'git', '-C', outdir, 'submodule', 'update' ]
            self.run_cmd(cmd, fatal="git submodule update")

    def do_clone(self, outdir: str, include_submodules: bool=False) -> None:
        if self.revision and re.match(r"^[0-9a-fA-F]{40,}$", self.revision):
            self.do_clone_commit(outdir, include_submodules=include_submodules)
            return
        cmd = [ 'git', 'clone', self.scmtoolurl, outdir ]
        if include_submodules:
            if self.subdir:
               cmd += [ "--recurse-submodules=" + self.subdir ]
            else:
               cmd += [ '--recurse-submodules' ]
        if shallow_clone:
            cmd += [ '--depth', '1' ]
        if self.revision:
            cmd.insert(2, '-b')
            cmd.insert(3, self.revision)
        env = {"GIT_LFS_SKIP_SMUDGE": "1", **os.environ} if self.no_lfs else None
        self.run_cmd(cmd, fatal="git clone", env=env)

    # the _scmsync.obsinfo file might become obsolete again when we store entire
    # git history by default later.
    def write_obsinfo(self, outdir: str) -> None:
        cmd = [ 'git', 'rev-parse', 'HEAD' ]
        line = self.run_cmd(cmd, cwd=outdir, fatal="git rev-parse")
        commit = line.rstrip()
        cmd = [ 'git', 'log', '-n1', '--date=format:%Y%m%d', '--no-show-signature', '--pretty=format:%ct' ]
        line = self.run_cmd(cmd, cwd=outdir, fatal="git rev-parse")
        tstamp = line.rstrip()
        infofile = os.path.join(outdir, '_scmsync.obsinfo')
        with open(infofile, "w") as obsinfo:
            obsinfo.write("mtime: " + tstamp + "\n")
            obsinfo.write("commit: " + commit + "\n")
            if self.scmtoolurl:
                obsinfo.write("url: " + self.scmtoolurl + "\n")
            if self.revision:
                obsinfo.write("revision: " + self.revision + "\n")
            if self.subdir:
                obsinfo.write("subdir: " + self.subdir + "\n")
            if self.projectscmsync:
                obsinfo.write("projectscmsync: " + self.projectscmsync + "\n")

    def clone(self, include_submodules: bool=False) -> None:
        if not self.subdir:
            self.do_clone(self.outdir, include_submodules=include_submodules)
            self.write_obsinfo(self.outdir)
            return
        clonedir = tempfile.mkdtemp(prefix="obs-scm-bridge")
        self.do_clone(clonedir, include_submodules=include_submodules)
        fromdir = os.path.join(clonedir, self.subdir)
        if os.path.islink(fromdir):
            target = os.readlink(fromdir).rstrip('/') # this is no recursive lookup, but is there a usecase?
            if '/' in target:
                print("ERROR: only local links are supported: " + self.subdir)
                sys.exit(1)
            # switch subdir and clone again
            self.subdir=target
            shutil.rmtree(clonedir)
            clonedir = tempfile.mkdtemp(prefix="obs-scm-bridge")
            fromdir = os.path.join(clonedir, self.subdir)
            self.do_clone(clonedir, include_submodules=include_submodules)
            self.write_obsinfo(clonedir)

        if not os.path.realpath(fromdir+'/').startswith(os.path.realpath(clonedir+'/')):
            print("ERROR: subdir is not below clone directory")
            sys.exit(1)
        if not os.path.isdir(fromdir):
            print("ERROR: subdir " + self.subdir + " does not exist")
            sys.exit(1)
        if not os.path.isdir(self.outdir):
            os.makedirs(self.outdir)
        for name in os.listdir(fromdir):
            shutil.move(os.path.join(fromdir, name), self.outdir)
        shutil.rmtree(clonedir)

    def fetch_tags(self) -> None:
        cmd = [ 'git', '-C', self.outdir, 'fetch', '--tags', 'origin', '+refs/heads/*:refs/remotes/origin/*' ]
        logging.info("fetching all tags")
        self.run_cmd(cmd, fatal="fetch --tags")

    def cpio_directory(self, directory: str) -> None:
        logging.info("create archivefile for %s", directory)
        cmd = [ download_assets, '--create-cpio', '--', directory ]
        with open(directory + '.obscpio', 'w') as archivefile:
            self.run_cmd(cmd, stdout=archivefile, fatal="cpio creation")

    def cpio_specials(self, specials: List[str]) -> None:
        if not specials:
            return
        logging.info("create archivefile for specials")
        cmd = [ download_assets, '--create-cpio', '--', '.' ] + specials
        with open('build.specials.obscpio', 'w') as archivefile:
            self.run_cmd(cmd, stdout=archivefile, fatal="cpio creation")

    def cpio_directories(self) -> None:
        logging.debug("walk via %s", self.outdir)
        os.chdir(self.outdir)
        listing = sorted(os.listdir("."))
        specials = []
        for name in listing:
            if name in ('.git', '.gitattributes') and not self.keep_meta:
                # we do not store git meta data by default to avoid bloat storage
                continue
            if name[0:1] == '.':
                specials.append(name)
                continue
            if os.path.islink(name):
                specials.append(name)
                continue
            if os.path.isdir(name):
                logging.info("CPIO %s ", name)
                self.cpio_directory(name)
                shutil.rmtree(name)
        if specials:
            self.cpio_specials(specials)
            for name in specials:
                if os.path.isdir(name):
                    shutil.rmtree(name)
                else:
                    os.unlink(name)

    def get_assets(self) -> None:
        logging.info("downloading assets")
        cmd = [ download_assets ]
        for arch in self.arch:
            cmd += [ '--arch', arch ]
        if pack_directories:
            cmd += [ '--noassetdir', '--', self.outdir ]
        else:
            cmd += [ '--unpack', '--noassetdir', '--', self.outdir ]
        self.run_cmd(cmd, fatal="asset download")

    def copyfile(self, src: str, dst: str) -> None:
        shutil.copy2(os.path.join(self.outdir, src), os.path.join(self.outdir, dst))

    def export_debian_files(self) -> None:
        if os.path.isfile(self.outdir + "/debian/control") and \
                not os.path.isfile(self.outdir + "/debian.control"):
            self.copyfile("debian/control", "debian.control")
        if os.path.isfile(self.outdir + "/debian/changelog") and \
                not os.path.isfile(self.outdir + "/debian.changelog"):
            self.copyfile("debian/changelog", "debian.changelog")

    def get_debian_origtar(self) -> None:
        if os.path.isfile(self.outdir + "/debian/control"):
            # need up get all tags 
            if not self.subdir:
                self.fetch_tags()
            cmd = [ export_debian_orig_from_git, self.outdir ]
            logging.info("exporting debian origtar")
            self.run_cmd(cmd, fatal="debian origtar export")

    def get_subdir_info(self, dir: str) -> str:
        cmd = [ download_assets, '--show-dir-srcmd5', '--', dir ]
        info = self.run_cmd(cmd, fatal="download_assets --show-dir-srcmd5")
        return info.strip()

    def write_info_file(self, filename: str, info: str) -> None:
        with open(filename, 'w') as infofile:
            infofile.write(info + '\n')

    def add_service_info(self) -> None:
        info = None
        if self.subdir:
            info = self.get_subdir_info(self.outdir)
        else:
            cmd = [ 'git', '-C', self.outdir, 'show', '-s', '--pretty=format:%H', 'HEAD' ]
            info = self.run_cmd(cmd, fatal="git show -s HEAD")
            info = info.strip()
        if info:
            self.write_info_file(os.path.join(self.outdir, "_service_info"), info)

    def write_package_xml_file(self, name: str, url: str, projectscmsync: str=None) -> None:
        projecturlxml=''
        if self.onlybuild and not name in self.onlybuild.keys():
            return
        if projectscmsync:
            projecturlxml=f"""\n  <url>{escape(projectscmsync)}</url>"""
        with open(f"{name}.xml", 'w') as xmlfile:
            xmlfile.write(f"""<package name="{escape(name)}">""")
            if self.enforce_bcntsynctag:
                xmlfile.write(f"""<bcntsynctag>{escape(name)}</bcntsynctag>{projecturlxml}""")
            xmlfile.write(f"""<scmsync>{escape(url)}</scmsync>
</package>\n""")

    def write_package_xml_local_link(self, target: str, name: str) -> None:
        with open(f"{name}.xml", 'w') as xmlfile:
            xmlfile.write(f"""<package name="{escape(name)}">""")
            if self.enforce_bcntsynctag:
                xmlfile.write(f"""<bcntsynctag>{escape(name)}</bcntsynctag>{projecturlxml}""")
            xmlfile.write(f"""</package>""")
        self.export_files.add(name + ".xml")
        with open(f"{name}.link", 'w') as linkfile:
            linkfile.write(f"""<link package="{escape(target)}" cicount="copy" />""")
        self.export_files.add(name + ".link")

    def list_submodule_revisions(self) -> None:
        self.revisions = {}
        cmd = [ 'git', 'ls-tree', 'HEAD', '.' ]
        output = self.run_cmd(cmd, fatal="git ls-tree")
        for line in output.splitlines():
            lstree = line.split(maxsplit=4)
            if lstree[1] == 'commit' and len(lstree[2]) >= 40:
                self.revisions[lstree[3]] = lstree[2]

    def process_package_submodule(self, gsmsection: configparser.SectionProxy, package_name: Optional[str]=None) -> None:
        path = gsmsection['path']
        urlstr = gsmsection['url']
        if not package_name:
            package_name = path

        if '/' in path:
            # we handle only top level submodules in project mode
            return

        # find revision of submodule
        if not self.revisions:
            self.list_submodule_revisions()
        assert self.revisions, "self.revisions must not be None after calling self.list_submodule_revisions"
        revision = self.revisions.get(path, None)
        if not revision:
            logging.error("Could not determine revision of submodule for " + path)
            sys.exit(1)

        # all good, write xml file and register the module
        self.gitsubmodules.add(path)
        url = list(urllib.parse.urlparse(urlstr))
        url[5] = revision
        if self.arch:
            query = urllib.parse.parse_qs(url[4]);
            query['arch'] = self.arch
            url[4] = urllib.parse.urlencode(query)

        # handle relative urls in submodules
        unparsed_url = urllib.parse.urlunparse(url)
        if ".." == unparsed_url[0:2]:
            # need to append a '/' to the base url so that the relative
            # path is properly resolved, otherwise we might descend one
            # directory too far
            unparsed_url = urllib.parse.urljoin(self.scmtoolurl+'/', unparsed_url)
        if self.url[0][0:4] == 'git+':
            unparsed_url = 'git+' + unparsed_url

        projecturl = self.url.copy()
        # replace the fragment with the checked out commit id
        cmd = [ 'git', 'rev-parse', 'HEAD' ]
        line = self.run_cmd(cmd, cwd=self.outdir, fatal="git rev-parse")
        projecturl[5] = line.rstrip()
        projectscmsync = urllib.parse.urlunparse(projecturl)

        self.write_package_xml_file(package_name, unparsed_url, projectscmsync)
        self.write_info_file(package_name + ".info", revision)
        self.export_files.add(package_name + ".xml")
        self.export_files.add(package_name + ".info")

    def process_package_subdirectory(self, directory: str) -> None:
        info = self.get_subdir_info(directory)
        shutil.rmtree(directory)
        if not self._REGEXP.match(directory):
            logging.warn("directory name contains invalid char: %s", directory)
            return

        # add subdir info file
        self.write_info_file(directory + ".info", info)

        # add subdir parameter to url
        url = self.url.copy()
        query = urllib.parse.parse_qs(url[4])
        query['subdir'] = directory
        url[4] = urllib.parse.urlencode(query)
        if self.revision:
            url[5] = self.revision

        self.write_package_xml_file(directory, urllib.parse.urlunparse(url))

    def generate_package_xml_files(self) -> None:
        logging.debug("walk via %s", self.outdir)
        os.chdir(self.outdir)

        # find all top level git submodules
        gsmconfig = None
        if os.path.isfile('.gitmodules'):
            gsmconfig = configparser.ConfigParser()
            gsmconfig.read('.gitmodules')
            for section in gsmconfig.sections():
                if not 'path' in gsmconfig[section]:
                    logging.warn("path not defined for git submodule " + section)
                    continue
                if not 'url' in gsmconfig[section]:
                    logging.warn("url not defined for git submodule " + section)
                    continue
                self.process_package_submodule(gsmconfig[section])

        # handle plain files and directories
        listing = sorted(os.listdir("."))
        for name in listing:
            if os.path.islink(name):
                target = os.readlink(name).rstrip('/') # this is no recursive lookup, but is there a usecase?
                if '/' in target:
                    logging.warn("only local links are supported, skipping: " + name)
                    continue
                if target in self.gitsubmodules or os.path.isdir(target):
                    self.write_package_xml_local_link(target, name)
                    os.unlink(name)
                else:
                    logging.debug("skipping symlink to a non git submodule %s -> %s", name, target)
                    os.unlink(name)

        # handle plain files and directories
        listing = sorted(os.listdir("."))
        for name in listing:
            if name == '.git' and not self.keep_meta:
                shutil.rmtree(name)
                continue
            if os.path.islink(name):
                continue
            elif os.path.isdir(name):
                if name in self.gitsubmodules:
                    # already handled as git submodule
                    shutil.rmtree(name)
                    continue
                self.process_package_subdirectory(name)
            else:
                if not name in self.export_files:
                    os.unlink(name)

if __name__ == '__main__':

    parser = argparse.ArgumentParser(
        description='Open Build Service source service for managing packaging files in git.'
        'This is a special service for OBS git integration.')
    parser.add_argument('--outdir', required=True,
                        help='output directory for modified sources',
                        nargs=1,
                        type=str)
    parser.add_argument('--url',
                        help='REQUIRED: url to git repository',
                        required=True,
                        nargs=1,
                        type=str)
    parser.add_argument('--projectmode',
                        help='just return the package list based on the subdirectories')
    parser.add_argument('--projectscmsync',
                        help='add also reference information of a project git for a package clone')
    parser.add_argument('--debug',
                        help='verbose debug mode')
    args = vars(parser.parse_args())

    url = args['url'][0]
    outdir = args['outdir'][0]
    project_mode = args['projectmode']
    projectscmsync = args['projectscmsync']

    if args['debug']:
        logging.getLogger().setLevel(logging.DEBUG)
        logging.debug("Running in debug mode")

    # workflow
    obsgit = ObsGit(outdir, url, projectscmsync)
    if project_mode == 'true' or project_mode == '1':
        obsgit.clone()
        obsgit.generate_package_xml_files()
        sys.exit(0)

    obsgit.clone(include_submodules=True)

    if pack_directories:
        obsgit.add_service_info()
    if get_assets:
        obsgit.get_assets()
        obsgit.get_debian_origtar()
    if pack_directories:
        obsgit.export_debian_files()
        obsgit.cpio_directories()

