#!/usr/bin/python3
# -*- coding: utf-8 -*-

# scm (only git atm) cloning and packaging for Open Build Service
# 
# (C) 2021 by Adrian Schröter <adrian@suse.de>
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
# See http://www.gnu.org/licenses/gpl-2.0.html for full license text.

import argparse
import os
import re
import shutil
import sys
import logging
import subprocess
import tempfile
import yaml
from html import escape
from typing import Dict, List, Optional, TextIO, Tuple, Union
import urllib.parse
import configparser

download_assets = '/usr/lib/build/download_assets'
critical_instances_config = "/etc/obs/services/scm-bridge/critical-instances"
credentials_config = "/etc/obs/services/scm-bridge/credentials"
export_debian_orig_from_git = '/usr/lib/build/export_debian_orig_from_git'
pack_directories = False
get_assets = False
shallow_clone = True
create_obsinfo = False
rewrite_url_to_ssh = False
write_obsinfo_with_subdir = True

if os.environ.get('DEBUG_SCM_BRIDGE') == "1":
    logging.getLogger().setLevel(logging.DEBUG)
if os.environ.get('OBS_SERVICE_DAEMON'):
    pack_directories = True
    get_assets = True
    create_obsinfo = True
if os.environ.get('OSC_VERSION'):
    get_assets = True
    shallow_clone = False
    create_obsinfo = False
    rewrite_url_to_ssh = True
os.environ['LANG'] = "C"

class ObsGit(object):

    _REGEXP = re.compile(r"^[a-zA-Z0-9\-\_\+][a-zA-Z0-9\.\-\_\+]*$");

    def __init__(self, outdir: str, url: str, projectscmsync: str) -> None:
        self.outdir   = outdir
        self.clonedir = None
        self.revision = None
        self.subdir   = None
        self.projectscmsync = projectscmsync
        self.keep_meta = False
        self.arch = []
        self.critical_git_servers = []
        self.onlybuild = None
        self.url = list(urllib.parse.urlparse(url))
        self.no_lfs = False
        self.enforce_bcntsynctag = False
        # for project level mode
        self.processed = {}
        self.git_store = None
        self.shallow_clone = shallow_clone
        self.create_obsinfo = create_obsinfo
        self.add_noobsinfo = False
        #: git submodule config, if present
        self.gsmconfig: Optional[configparser.ConfigParser] = None

        query = urllib.parse.parse_qs(self.url[4], keep_blank_values=True)
        if "subdir" in query:
            self.subdir = query['subdir'][0]
            del query['subdir']
            self.url[4] = urllib.parse.urlencode(query, doseq=True)
        if "arch" in query:
            self.arch = query['arch']
            del query['arch']
            self.url[4] = urllib.parse.urlencode(query, doseq=True)
        if "enforce_bcntsynctag" in query:
            self.enforce_bcntsynctag = True
            del query['enforce_bcntsynctag']
            self.url[4] = urllib.parse.urlencode(query, doseq=True)
        if "keepmeta" in query:
            self.keep_meta = True
            del query['keepmeta']
            self.url[4] = urllib.parse.urlencode(query, doseq=True)
        if "lfs" in query:
            self.no_lfs = query["lfs"] == ["0"]
            del query["lfs"]
            self.url[4] = urllib.parse.urlencode(query, doseq=True)
        if "onlybuild" in query:
            ob = query['onlybuild']
            self.onlybuild={ob[i]:1 for i in range(len(ob))}
            del query['onlybuild']
            self.url[4] = urllib.parse.urlencode(query, doseq=True)
        if "noobsinfo" in query:
            self.add_noobsinfo = True
            self.create_obsinfo = False
            del query['noobsinfo']
            self.url[4] = urllib.parse.urlencode(query, doseq=True)
        if self.url[5]:
            self.revision = self.url[5]
            self.url[5] = ''
        # we cannot keep the meta in subdir mode and we can always
        # do a shallow clone
        if self.subdir:
            self.shallow_clone = True
            # we cannot do a shallow clone if we want the commit data
            # for a specific directory
            if self.create_obsinfo and write_obsinfo_with_subdir:
                self.shallow_clone = False
            self.keep_meta = False
        # if we keep the meta files we always want a deep clone
        if self.keep_meta:
            self.shallow_clone = False
        # scmtoolurl is the url we pass to the the scm tool
        scmtoolurl = self.url.copy()
        if scmtoolurl[0] and scmtoolurl[0][0:4] == 'git+':
            scmtoolurl[0] = scmtoolurl[0][4:]
        # we want to switch to ssh as the user is likely providing his
        # access via ssh pub key already.
        # Unfortunality there is no generic way to rewrite the url as
        # it depends on the installation.
        # For now we hard code our instances, but this needs to become
        # configurable or we need to detect it by asking the server
        # somehow.
        # Expect this to be moved to a config file
        if rewrite_url_to_ssh and scmtoolurl[0] == 'https':
            if scmtoolurl[1] == 'src.suse.de':
                scmtoolurl[1] = 'gitea@src.suse.de'
                scmtoolurl[0] = 'ssh'
            elif scmtoolurl[1] == 'src.opensuse.org':
                scmtoolurl[1] = 'gitea@src.opensuse.org'
                scmtoolurl[0] = 'ssh'
        self.scmtoolurl = urllib.parse.urlunparse(scmtoolurl)

    def add_critical_instance(
            self,
            name: str
    ) -> None:
        self.critical_git_servers.append(name)

    def setup_credentials(
            self,
            cred_file: str
    ) -> None:
        self.git_store = tempfile.mkstemp(prefix="obs-scm-bridge-git-cred-store", text=True)
        cmd = [ 'git', 'config', '--global', 'credential.helper', f"store --file {self.git_store[1]}" ]
        self.run_cmd(cmd, fatal="git config credential.helper")

        with open(cred_file, "r", encoding="utf-8") as cred:
            for l in cred.readlines():
                line = l.rstrip()
                entry = line.split(' ')
                if len(entry) != 4:
                    continue

                project = str(os.getenv('OBS_SERVICE_PROJECT'))
                if entry[0] != '*' and not project.startswith(entry[0]):
                    continue

                cmd = [ 'git', 'credential-store', '--file', self.git_store[1], 'store' ]
                proc = subprocess.Popen(cmd,
                                        shell=False,
                                        encoding="utf-8",
                                        stdin=subprocess.PIPE)
                # hostname username token/password
                text=f"protocol=https\nhost={entry[1]}\nusername={entry[2]}\npassword={entry[3]}\n"
                proc.communicate(input=text)
                if proc.returncode != 0:
                    logging.error("Could not setup git credential store")
                    sys.exit(1)

    def run_cmd_nonfatal(
            self,
            cmd: List[str],
            *,
            cwd: Optional[str]=None,
            stdout: Union[int, TextIO]=subprocess.PIPE,
            env: Optional[Dict[str, str]]=None,
    ) -> Tuple[int, str]:
        logging.debug("COMMAND: %s" % cmd)
        stderr = subprocess.PIPE
        if stdout == subprocess.PIPE:
            stderr = subprocess.STDOUT
        proc = subprocess.Popen(cmd,
                                shell=False,
                                stdout=stdout,
                                stderr=stderr,
                                cwd=cwd,
                                env=env)
        std_out = proc.communicate()[0]
        output = std_out.decode() if std_out else ''

        logging.debug("RESULT(%d): %s", proc.returncode, repr(output))
        return (proc.returncode, output)

    def run_cmd(
            self,
            cmd: List[str],
            *,
            fatal: str,
            cwd: Optional[str]=None,
            stdout: Union[int, TextIO]=subprocess.PIPE,
            env: Optional[Dict[str, str]]=None,
    ) -> str:
        returncode, output = self.run_cmd_nonfatal(cmd, cwd=cwd, stdout=stdout, env=env)
        if returncode != 0:
            print("ERROR: " + fatal + " failed: ", output)
            transient_error = False
            for name in self.critical_git_servers:
                if output.find("Failed to connect to " + name) >= 0:
                    transient_error = True
                if output.find("unable to access") >= 0 and output.find(name) >= 0:
                    transient_error = True
            if transient_error:
                print("TRANSIENT ERROR: " + fatal + " failed")
            sys.exit(returncode)
        return output

    def do_set_sparse_checkout(self, outdir: str) -> None:
        self.run_cmd([ 'git', '-C', outdir, 'sparse-checkout', 'set', self.subdir ], fatal="git sparse-checkout")

    def do_checkout(self, outdir: str, branch: str, include_submodules: bool=False) -> None:
        env = {"GIT_LFS_SKIP_SMUDGE": "1", **os.environ} if self.no_lfs else None
        cmd = [ 'git', '-C', outdir, 'checkout', '-q', branch]
        self.run_cmd(cmd, fatal="git checkout", env=env)
        if include_submodules:
            cmd = [ 'git', '-C', outdir, 'submodule', 'init' ]
            self.run_cmd(cmd, fatal="git submodule init", env=env)
            cmd = [ 'git', '-C', outdir, 'submodule', 'update', '--recursive' ]
            if self.subdir:
                cmd += [ self.subdir ]
            self.run_cmd(cmd, fatal="git submodule update", env=env)

    def do_clone_commit(self, outdir: str, include_submodules: bool=False) -> None:
        assert self.revision, "no revision is set but do_clone_commit was called"
        objectformat='--object-format=sha1'
        if len(self.revision) == 64:
            objectformat='--object-format=sha256'
        cmd = [ 'git', 'init', objectformat, outdir ]
        self.run_cmd(cmd, fatal="git init")
        cmd = [ 'git', '-C', outdir, 'remote', 'add', 'origin', self.scmtoolurl ]
        self.run_cmd(cmd, fatal="git remote add origin")
        cmd = [ 'git', '-C', outdir, 'fetch', 'origin', self.revision ]
        if self.shallow_clone:
            cmd += [ '--depth', '1' ]
        if self.subdir:
            cmd += [ '--filter=blob:none' ]
        if include_submodules:
            cmd += [ '--recurse-submodules' ]
        (returncode, output) = self.run_cmd_nonfatal(cmd)
        if returncode != 0:
            # fetch failed, maybe the server refuses to give out unadvertised commits.
            # fall back to a a full fetch
            cmd = [ 'git', '-C', outdir, 'fetch', 'origin' ]
            if include_submodules:
                cmd += [ '--recurse-submodules' ]
            self.run_cmd(cmd, fatal="git fetch")
        if self.subdir:
            self.do_set_sparse_checkout(outdir)
        self.do_checkout(outdir, self.revision, include_submodules=include_submodules)

    def do_clone(self, outdir: str, include_submodules: bool=False) -> None:
        if self.revision and re.match(r"^[0-9a-fA-F]{40,}$", self.revision):
            self.do_clone_commit(outdir, include_submodules=include_submodules)
            return
        cmd = [ 'git', 'clone', self.scmtoolurl, outdir ]
        if include_submodules:
            if self.subdir:
               cmd += [ "--recurse-submodules=" + self.subdir ]
            else:
               cmd += [ '--recurse-submodules' ]
        if self.shallow_clone:
            cmd += [ '--depth', '1' ]
        if self.subdir:
            cmd += [ '--filter=blob:none', '--no-checkout' ]
        if self.revision:
            cmd.insert(2, '-b')
            cmd.insert(3, self.revision)
        env = {"GIT_LFS_SKIP_SMUDGE": "1", **os.environ} if self.no_lfs else None
        self.run_cmd(cmd, fatal="git clone", env=env)
        if self.subdir:
            self.do_set_sparse_checkout(outdir)
            self.do_checkout(outdir, 'HEAD', include_submodules=include_submodules)

    # the _scmsync.obsinfo file might become obsolete again when we store entire
    # git history by default later.
    def write_obsinfo(self) -> None:
        if not self.create_obsinfo:
            # we write it only when importing data into the source server
            return
        if not self.subdir:
            cmd = [ 'git', 'rev-parse', 'HEAD' ]
            line = self.run_cmd(cmd, cwd=self.clonedir, fatal="git rev-parse")
            commit = line.rstrip()
            cmd = [ 'git', 'log', '-n1', '--date=format:%Y%m%d', '--no-show-signature', '--pretty=format:%ct' ]
            line = self.run_cmd(cmd, cwd=self.clonedir, fatal="git log")
            tstamp = line.rstrip()
        else:
            cmd = [ 'git', 'log', '-n1', '--no-show-signature', '--pretty=format:%H %ct', '--', self.subdir]
            line = self.run_cmd(cmd, cwd=self.clonedir, fatal="git log")
            commit, tstamp = line.split()
        infofile = os.path.join(self.outdir, '_scmsync.obsinfo')
        if os.path.lexists(infofile):
            logging.error("ERROR: The _scmsync.obsinfo file must not be part of the git repository.")
            sys.exit(1)
        with open(infofile, 'x') as obsinfo:
            obsinfo.write("mtime: " + tstamp + "\n")
            obsinfo.write("commit: " + commit + "\n")
            if self.scmtoolurl:
                obsinfo.write("url: " + self.scmtoolurl + "\n")
            if self.revision:
                obsinfo.write("revision: " + self.revision + "\n")
            if self.subdir:
                obsinfo.write("subdir: " + self.subdir + "\n")
            if self.projectscmsync:
                obsinfo.write("projectscmsync: " + self.projectscmsync + "\n")

    def check_subdir(self, subdir: str):
        fromdir = os.path.join(self.clonedir, subdir)
        if not os.path.realpath(fromdir+'/').startswith(os.path.realpath(self.clonedir+'/')):
            print("ERROR: subdir " + subdir + " is not below clone directory")
            sys.exit(1)
        if not os.path.isdir(fromdir):
            print("ERROR: subdir " + subdir + " does not exist")
            sys.exit(1)

    def clone(self, include_submodules: bool=False, no_local_link: bool=False, write_service_info: bool=False) -> None:
        if not self.subdir:
            self.clonedir = self.outdir
            self.do_clone(self.outdir, include_submodules=include_submodules)
            self.write_obsinfo()
            if write_service_info:
                self.write_service_info()
            return
        clonedir = tempfile.mkdtemp(prefix="obs-scm-bridge")
        self.clonedir = clonedir
        self.do_clone(clonedir, include_submodules=include_submodules)
        self.check_subdir(self.subdir)

        fromdir = os.path.join(clonedir, self.subdir)
        if os.path.islink(fromdir):
            if no_local_link:
                print("ERROR: local link points to another local link: " + self.subdir)
                sys.exit(1)
            target = os.readlink(fromdir).rstrip('/') # this is no recursive lookup, but is there a usecase?
            if not target or '/' in target or target.startswith('.'):
                print("ERROR: only local links are supported: " + self.subdir)
                sys.exit(1)
            # switch subdir and clone again
            self.subdir=target
            shutil.rmtree(clonedir)
            self.clonedir = None
            self.clone(include_submodules=include_submodules, no_local_link=True, write_service_info=write_service_info)
            return

        if not os.path.isdir(self.outdir):
            os.makedirs(self.outdir)
        if write_obsinfo_with_subdir:
            self.write_obsinfo()
        if write_service_info:
            self.write_service_info()
        for name in os.listdir(fromdir):
            shutil.move(os.path.join(fromdir, name), self.outdir)
        shutil.rmtree(clonedir)
        self.clonedir = None

    def fetch_tags(self) -> None:
        cmd = [ 'git', '-C', self.clonedir, 'fetch', '--tags', 'origin', '+refs/heads/*:refs/remotes/origin/*' ]
        logging.info("fetching all tags")
        self.run_cmd(cmd, fatal="fetch --tags")

    def cpio_directory(self, directory: str) -> None:
        logging.info("create archivefile for %s", directory)
        cmd = [ download_assets, '--create-cpio', '--', directory ]
        with open(directory + '.obscpio', 'x') as archivefile:
            self.run_cmd(cmd, stdout=archivefile, fatal="cpio creation")

    def cpio_specials(self, specials: List[str]) -> None:
        if not specials:
            return
        logging.info("create archivefile for specials")
        cmd = [ download_assets, '--create-cpio', '--', '.' ] + specials
        with open('build.specials.obscpio', 'x') as archivefile:
            self.run_cmd(cmd, stdout=archivefile, fatal="cpio creation")

    def cpio_directories(self) -> None:
        logging.debug("walk via %s", self.outdir)
        os.chdir(self.outdir)
        listing = sorted(os.listdir("."))
        specials = []
        for name in listing:
            if name in ('.git', '.gitattributes') and not self.keep_meta:
                # we do not store git meta data by default to avoid bloat storage
                continue
            if name[0:1] == '.':
                specials.append(name)
                continue
            if os.path.islink(name):
                specials.append(name)
                continue
            if os.path.isdir(name):
                logging.info("CPIO %s ", name)
                self.cpio_directory(name)
                shutil.rmtree(name)
        if specials:
            self.cpio_specials(specials)
            for name in specials:
                if os.path.isdir(name):
                    shutil.rmtree(name)
                else:
                    os.unlink(name)

    def get_assets(self) -> None:
        logging.info("downloading assets")
        cmd = [ download_assets ]
        for arch in self.arch:
            cmd += [ '--arch', arch ]
        if pack_directories:
            cmd += [ '--noassetdir', '--', self.outdir ]
        else:
            cmd += [ '--unpack', '--noassetdir', '--', self.outdir ]
        self.run_cmd(cmd, fatal="asset download")

    def copyfile(self, src: str, dst: str) -> None:
        shutil.copy2(os.path.join(self.outdir, src), os.path.join(self.outdir, dst))

    def export_debian_files(self) -> None:
        if os.path.isfile(self.outdir + "/debian/control") and \
                not os.path.isfile(self.outdir + "/debian.control"):
            self.copyfile("debian/control", "debian.control")
        if os.path.isfile(self.outdir + "/debian/changelog") and \
                not os.path.isfile(self.outdir + "/debian.changelog"):
            self.copyfile("debian/changelog", "debian.changelog")

    def get_debian_origtar(self) -> None:
        if os.path.isfile(self.outdir + "/debian/control"):
            # need up get all tags 
            if not self.subdir:
                self.fetch_tags()
            cmd = [ export_debian_orig_from_git, self.outdir ]
            logging.info("exporting debian origtar")
            self.run_cmd(cmd, fatal="debian origtar export")

    def get_subdir_info(self, path: str) -> str:
        cmd = [ 'git', '-C', self.clonedir, 'ls-tree', '-d', 'HEAD', path ]
        output = self.run_cmd(cmd, fatal="git ls-tree")
        for line in output.splitlines():
            lstree = line.split(maxsplit=4)
            if len(lstree[2]) >= 40:
               return lstree[2]
        logging.error(f"ERROR: could not determine tree info of {path}")
        sys.exit(1)

    def write_info_file(self, filename: str, info: str) -> None:
        if not filename.startswith("/"):
            filename = self.outdir + "/" + filename
        if os.path.lexists(filename):
            logging.error(f"ERROR: The {filename} file must not be part of the git repository.")
            sys.exit(1)
        with open(filename, 'x') as infofile:
            infofile.write(info + '\n')

    def write_service_info(self) -> None:
        info = None
        if self.subdir:
            info = self.get_subdir_info(self.subdir)
        else:
            cmd = [ 'git', '-C', self.clonedir, 'rev-parse', 'HEAD' ]
            info = self.run_cmd(cmd, fatal="git rev-parse HEAD")
            info = info.strip()
        if info:
            self.write_info_file(os.path.join(self.outdir, "_service_info"), info)

    def write_package_xml_file(self, name: str, url: str, projectscmsync: str=None) -> None:
        if self.onlybuild and not name in self.onlybuild.keys():
            return
        filename = f"{self.outdir}/{name}.xml"
        with open(filename, 'x') as xmlfile:
            xmlfile.write(f"""<package name="{escape(name)}">\n""")
            if self.enforce_bcntsynctag:
                xmlfile.write(f"""<bcntsynctag>{escape(name)}</bcntsynctag>\n""")
            if projectscmsync:
                xmlfile.write(f"""<url>{escape(projectscmsync)}</url>\n""")
            xmlfile.write(f"""<scmsync>{escape(url)}</scmsync>\n</package>\n""")

    def write_package_xml_local_link(self, name: str, target: str, projectscmsync: str=None) -> None:
        filename = f"{self.outdir}/{name}.xml"
        with open(filename, 'x') as xmlfile:
            xmlfile.write(f"""<package name="{escape(name)}">""")
            if self.enforce_bcntsynctag:
                xmlfile.write(f"""<bcntsynctag>{escape(name)}</bcntsynctag>""")
            if projectscmsync:
                xmlfile.write(f"""<url>{escape(projectscmsync)}</url>\n""")
            xmlfile.write(f"""</package>""")
        filename = f"{self.outdir}/{name}.link"
        with open(filename, 'x') as linkfile:
            linkfile.write(f"""<link package="{escape(target)}" cicount="copy" />""")

    def list_submodule_revisions(self, subdir: str) -> Dict[str, str]:
        revisions = {}
        cmd = [ 'git', '-C', self.clonedir, 'ls-tree', 'HEAD', (subdir if subdir else '.') ]
        output = self.run_cmd(cmd, fatal="git ls-tree")
        for line in output.splitlines():
            lstree = line.split(maxsplit=4)
            if lstree[1] == 'commit' and len(lstree[2]) >= 40:
                revisions[lstree[3]] = lstree[2]
        return revisions

    def process_package_submodule(self, gsmsection: configparser.SectionProxy, name: str, revision: str) -> None:
        if not self._REGEXP.match(name):
            logging.warn("submodule name contains invalid char: %s", name)
            return

        path = gsmsection['path']
        urlstr = gsmsection['url']

        # write xml file and register the module
        url = list(urllib.parse.urlparse(urlstr))
        url[5] = revision
        if self.arch:
            query = urllib.parse.parse_qs(url[4], keep_blank_values=True);
            query['arch'] = self.arch
            url[4] = urllib.parse.urlencode(query, doseq=True)
        if self.add_noobsinfo:
            query = urllib.parse.parse_qs(url[4], keep_blank_values=True);
            query['noobsinfo'] = [ '1' ]
            url[4] = urllib.parse.urlencode(query, doseq=True)

        # handle relative urls in submodules
        unparsed_url = urllib.parse.urlunparse(url)
        if ".." == unparsed_url[0:2]:
            # need to append a '/' to the base url so that the relative
            # path is properly resolved, otherwise we might descend one
            # directory too far
            unparsed_url = urllib.parse.urljoin(self.scmtoolurl+'/', unparsed_url)
        if self.url[0][0:4] == 'git+':
            unparsed_url = 'git+' + unparsed_url

        projecturl = self.url.copy()
        # replace the fragment with the checked out commit id
        cmd = [ 'git', '-C', self.clonedir, 'rev-parse', 'HEAD' ]
        line = self.run_cmd(cmd, fatal="git rev-parse HEAD")
        projecturl[5] = line.rstrip()
        projectscmsync = urllib.parse.urlunparse(projecturl)

        self.write_package_xml_file(name, unparsed_url, projectscmsync)
        self.write_info_file(name + ".info", revision)

    def process_package_subdirectory(self, name: str, subdir: str) -> None:
        # current directory is self.outdir
        if not self._REGEXP.match(name):
            logging.warn("directory name contains invalid char: %s", name)
            return

        # add subdir info file
        info = self.get_subdir_info(subdir + name)
        self.write_info_file(name + ".info", info)

        # add subdir parameter to url
        url = self.url.copy()
        query = urllib.parse.parse_qs(url[4], keep_blank_values=True)
        query['subdir'] = subdir + name
        url[4] = urllib.parse.urlencode(query, doseq=True)
        if self.revision:
            url[5] = self.revision
        if self.arch:
            query = urllib.parse.parse_qs(url[4], keep_blank_values=True);
            query['arch'] = self.arch
            url[4] = urllib.parse.urlencode(query, doseq=True)
        if self.add_noobsinfo:
            query = urllib.parse.parse_qs(url[4], keep_blank_values=True);
            query['noobsinfo'] = 1
            url[4] = urllib.parse.urlencode(query, doseq=True)

        self.write_package_xml_file(name, urllib.parse.urlunparse(url))

    def process_package_locallink(self, name: str, target: str) -> None:
        if not self._REGEXP.match(name):
            logging.warn("locallink name contains invalid char: %s", name)
            return
        if not self._REGEXP.match(target):
            logging.warn("locallink target contains invalid char: %s", target)
            return
        self.write_package_xml_local_link(name, target)

    def generate_project_files(self) -> None:
        clonedir = tempfile.mkdtemp(prefix="obs-scm-bridge")
        self.clonedir = clonedir
        self.do_clone(clonedir)
        if self.subdir:
            self.check_subdir(self.subdir)
        if not os.path.isdir(self.outdir):
            os.makedirs(self.outdir)
        if not self.subdir or write_obsinfo_with_subdir:
            self.write_obsinfo()
        os.chdir(self.outdir)
        subdir = self.subdir + '/' if self.subdir else ''
        if os.path.isfile(clonedir + '/.gitmodules'):
            self.gsmconfig = configparser.ConfigParser()
            self.gsmconfig.read(clonedir + '/.gitmodules')
        self.generate_package_xml_files_of_directory(subdir)
        shutil.rmtree(clonedir)
        self.clonedir = None

    def generate_package_xml_files_of_directory_submodules(self, subdir) -> None:
        revisions = None
        for section in self.gsmconfig.sections():
            gsmconfig = self.gsmconfig[section]
            if not 'path' in gsmconfig:
                logging.warn("path not defined for git submodule " + section)
                continue
            name = gsmconfig['path']
            if subdir:
                if not name.startswith(subdir):
                     continue
                name = name.removeprefix(subdir)
            if '/' in name:
                continue
            if name in self.processed:
                continue                # already handled
            if not 'url' in gsmconfig:
                logging.error("url not defined for submodule " + section)
                sys.exit(1)
            if not revisions:
                revisions = self.list_submodule_revisions(subdir)
            revision = revisions.get(gsmconfig['path'], None)
            if not revision:
                logging.error("Could not determine revision of submodule for " + gsmconfig['path'])
                sys.exit(1)
            self.process_package_submodule(gsmconfig, name, revision)
            self.processed[name] = True

    def generate_package_xml_files_of_directory(self, subdir) -> None:
        directory = (self.clonedir + '/' + subdir).rstrip('/')
        logging.debug("check %s (subdir=%s)", directory, subdir)

        if os.path.isfile(directory + '/_config'):
            shutil.move(directory + '/_config', '.')
            self.processed['_config'] = True

        if os.path.isfile(directory + '/_subdirs'):
            subdir_yml = None
            with open(directory + '/_subdirs') as stream:
                subdir_yml = yaml.safe_load(stream)
            for newsubdir in subdir_yml['subdirs']:
                if not newsubdir:
                    continue
                if (subdir + newsubdir + '/') in self.processed:
                    continue
                self.processed[subdir + newsubdir + '/'] = True
                self.generate_package_xml_files_of_directory(subdir + newsubdir + '/')
            if 'toplevel' not in subdir_yml or subdir_yml['toplevel'] != 'include':
                return

        # process all submodules in this directory
        if self.gsmconfig:
            self.generate_package_xml_files_of_directory_submodules(subdir)

        # handle plain files and directories
        logging.debug("walk via %s", directory)
        listing = sorted(os.listdir(directory))
        for name in listing:
            if name in self.processed:
                continue                # already handled
            fname = directory + '/' + name
            if name == '.git':
                if self.keep_meta and subdir == '':
                    shutil.move(fname, '.')
                    self.processed[name] = True
            elif os.path.islink(fname):
                target = os.readlink(fname).rstrip('/') # this is no recursive lookup, but is there a usecase?
                if not target or '/' in target or target.startswith('.'):
                    logging.warn("only local links are supported, skipping: " + name)
                    continue
                if not os.path.isdir(directory + '/' + target):
                    logging.debug("skipping dangling symlink %s -> %s", name, target)
                    continue
                self.process_package_locallink(name, target)
                self.processed[name] = True
            elif os.path.isdir(fname):
                if (subdir + name + '/') in self.processed:
                    continue            # already handled in _subdir loop
                self.process_package_subdirectory(name, subdir)
                self.processed[name] = True

if __name__ == '__main__':

    parser = argparse.ArgumentParser(
        description='Open Build Service source service for managing packaging files in git.'
        'This is a special service for OBS git integration.')
    parser.add_argument('--outdir', required=True,
                        help='output directory for modified sources',
                        nargs=1,
                        type=str)
    parser.add_argument('--url',
                        help='REQUIRED: url to git repository',
                        required=True,
                        nargs=1,
                        type=str)
    parser.add_argument('--projectmode',
                        help='just return the package list based on the subdirectories')
    parser.add_argument('--projectscmsync',
                        help='add also reference information of a project git for a package clone')
    parser.add_argument('--debug',
                        help='verbose debug mode')
    args = vars(parser.parse_args())

    url = args['url'][0]
    outdir = args['outdir'][0]
    project_mode = args['projectmode']
    projectscmsync = args['projectscmsync']

    if args['debug']:
        logging.getLogger().setLevel(logging.DEBUG)
        logging.debug("Running in debug mode")

    # workflow
    obsgit = ObsGit(outdir, url, projectscmsync)
    obsgit.add_critical_instance("src.opensuse.org")
    if os.path.isfile(critical_instances_config):
       with open(critical_instances_config) as conf:
           for line in conf.readlines():
               obsgit.add_critical_instance(line.rstrip())
    if os.path.isfile(credentials_config):
        obsgit.setup_credentials(credentials_config)

    if project_mode == 'true' or project_mode == '1':
        obsgit.generate_project_files()
        sys.exit(0)

    obsgit.clone(include_submodules=True, write_service_info=pack_directories)

    if get_assets:
        obsgit.get_assets()
        obsgit.get_debian_origtar()
    if pack_directories:
        obsgit.export_debian_files()
        obsgit.cpio_directories()

