#!/usr/bin/python3
# -*- coding: utf-8 -*-

# scm (only git atm) cloning and packaging for Open Build Service
# 
# (C) 2021 by Adrian Schröter <adrian@suse.de>
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
# See http://www.gnu.org/licenses/gpl-2.0.html for full license text.

import argparse
import os
import re
import shutil
import sys
import logging
import subprocess
import tempfile
import yaml
from html import escape
from typing import Dict, List, Optional, Set, TextIO, Tuple, Union
import urllib.parse
import configparser

download_assets = '/usr/lib/build/download_assets'
critical_instances_config = "/etc/obs/services/scm-bridge/critical-instances"
credentials_config = "/etc/obs/services/scm-bridge/credentials"
export_debian_orig_from_git = '/usr/lib/build/export_debian_orig_from_git'
pack_directories = False
get_assets = False
shallow_clone = True
create_obsinfo = False
rewrite_url_to_ssh = False
write_obsinfo_with_subdir = True

if os.environ.get('DEBUG_SCM_BRIDGE') == "1":
    logging.getLogger().setLevel(logging.DEBUG)
if os.environ.get('OBS_SERVICE_DAEMON'):
    pack_directories = True
    get_assets = True
    create_obsinfo = True
if os.environ.get('OSC_VERSION'):
    get_assets = True
    shallow_clone = False
    create_obsinfo = False
    rewrite_url_to_ssh = True
os.environ['LANG'] = "C"

class ObsGit(object):

    _REGEXP = re.compile(r"^[a-zA-Z0-9\.\-\_\+]*$");

    def __init__(self, outdir: str, url: str, projectscmsync: str) -> None:
        self.outdir   = outdir
        self.clonedir = None
        self.revision = None
        self.subdir   = None
        self.projectscmsync = projectscmsync
        self.keep_meta = False
        self.arch = []
        self.critical_git_servers = []
        self.onlybuild = None
        self.url = list(urllib.parse.urlparse(url))
        self.no_lfs = False
        self.enforce_bcntsynctag = False
        # for project level mode
        self.export_files = set(["_config", "_scmsync.obsinfo"])
        self.gitsubmodules: Set[str] = set()
        self.revisions: Optional[Dict[str, str]] = None
        self.processed = {}
        self.git_store = None
        self.shallow_clone = shallow_clone
        self.create_obsinfo = create_obsinfo
        self.add_noobsinfo = False

        query = urllib.parse.parse_qs(self.url[4], keep_blank_values=True)
        if "subdir" in query:
            self.subdir = query['subdir'][0]
            del query['subdir']
            self.url[4] = urllib.parse.urlencode(query, doseq=True)
        if "arch" in query:
            self.arch = query['arch']
            del query['arch']
            self.url[4] = urllib.parse.urlencode(query, doseq=True)
        if "enforce_bcntsynctag" in query:
            self.enforce_bcntsynctag = True
            del query['enforce_bcntsynctag']
            self.url[4] = urllib.parse.urlencode(query, doseq=True)
        if "keepmeta" in query:
            self.keep_meta = True
            del query['keepmeta']
            self.url[4] = urllib.parse.urlencode(query, doseq=True)
        if "lfs" in query:
            self.no_lfs = query["lfs"] == ["0"]
            del query["lfs"]
            self.url[4] = urllib.parse.urlencode(query, doseq=True)
        if "onlybuild" in query:
            ob = query['onlybuild']
            self.onlybuild={ob[i]:1 for i in range(len(ob))}
            del query['onlybuild']
            self.url[4] = urllib.parse.urlencode(query, doseq=True)
        if "noobsinfo" in query:
            self.add_noobsinfo = True
            self.create_obsinfo = False
            del query['noobsinfo']
            self.url[4] = urllib.parse.urlencode(query, doseq=True)
        if self.url[5]:
            self.revision = self.url[5]
            self.url[5] = ''
        # we cannot keep the meta in subdir mode and we can always
        # do a shallow clone
        if self.subdir:
            self.shallow_clone = True
            # we cannot do a shallow clone if we want the commit data
            # for a specific directory
            if self.create_obsinfo and write_obsinfo_with_subdir:
                self.shallow_clone = False
            self.keep_meta = False
        # if we keep the meta files we always want a deep clone
        if self.keep_meta:
            self.shallow_clone = False
        # scmtoolurl is the url we pass to the the scm tool
        scmtoolurl = self.url.copy()
        if scmtoolurl[0] and scmtoolurl[0][0:4] == 'git+':
            scmtoolurl[0] = scmtoolurl[0][4:]
        # we want to switch to ssh as the user is likely providing his
        # access via ssh pub key already.
        # Unfortunality there is no generic way to rewrite the url as
        # it depends on the installation.
        # For now we hard code our instances, but this needs to become
        # configurable or we need to detect it by asking the server
        # somehow.
        # Expect this to be moved to a config file
        if rewrite_url_to_ssh and scmtoolurl[0] == 'https':
            if scmtoolurl[1] == 'src.suse.de':
                scmtoolurl[1] = 'gitea@src.suse.de'
                scmtoolurl[0] = 'ssh'
            elif scmtoolurl[1] == 'src.opensuse.org':
                scmtoolurl[1] = 'gitea@src.opensuse.org'
                scmtoolurl[0] = 'ssh'
        self.scmtoolurl = urllib.parse.urlunparse(scmtoolurl)

    def add_critical_instance(
            self,
            name: str
    ) -> None:
        self.critical_git_servers.append(name)

    def setup_credentials(
            self,
            cred_file: str
    ) -> None:
        self.git_store = tempfile.mkstemp(prefix="obs-scm-bridge-git-cred-store", text=True)
        cmd = [ 'git', 'config', '--global', 'credential.helper', f"store --file {self.git_store[1]}" ]
        self.run_cmd(cmd, fatal="git config credential.helper")

        with open(cred_file, "r", encoding="utf-8") as cred:
            for l in cred.readlines():
                line = l.rstrip()
                entry = line.split(' ')
                if len(entry) != 4:
                    continue

                project = str(os.getenv('OBS_SERVICE_PROJECT'))
                if entry[0] != '*' and not project.startswith(entry[0]):
                    continue

                cmd = [ 'git', 'credential-store', '--file', self.git_store[1], 'store' ]
                proc = subprocess.Popen(cmd,
                                        shell=False,
                                        encoding="utf-8",
                                        stdin=subprocess.PIPE)
                # hostname username token/password
                text=f"protocol=https\nhost={entry[1]}\nusername={entry[2]}\npassword={entry[3]}\n"
                proc.communicate(input=text)
                if proc.returncode != 0:
                    logging.error("Could not setup git credential store")
                    sys.exit(1)

    def run_cmd_nonfatal(
            self,
            cmd: List[str],
            *,
            cwd: Optional[str]=None,
            stdout: Union[int, TextIO]=subprocess.PIPE,
            env: Optional[Dict[str, str]]=None,
    ) -> Tuple[int, str]:
        logging.debug("COMMAND: %s" % cmd)
        stderr = subprocess.PIPE
        if stdout == subprocess.PIPE:
            stderr = subprocess.STDOUT
        proc = subprocess.Popen(cmd,
                                shell=False,
                                stdout=stdout,
                                stderr=stderr,
                                cwd=cwd,
                                env=env)
        std_out = proc.communicate()[0]
        output = std_out.decode() if std_out else ''

        logging.debug("RESULT(%d): %s", proc.returncode, repr(output))
        return (proc.returncode, output)

    def run_cmd(
            self,
            cmd: List[str],
            *,
            fatal: str,
            cwd: Optional[str]=None,
            stdout: Union[int, TextIO]=subprocess.PIPE,
            env: Optional[Dict[str, str]]=None,
    ) -> str:
        returncode, output = self.run_cmd_nonfatal(cmd, cwd=cwd, stdout=stdout, env=env)
        if returncode != 0:
            print("ERROR: " + fatal + " failed: ", output)
            transient_error = False
            for name in self.critical_git_servers:
                if output.find("Failed to connect to " + name) >= 0:
                    transient_error = True
                if output.find("unable to access") >= 0 and output.find(name) >= 0:
                    transient_error = True
            if transient_error:
                print("TRANSIENT ERROR: " + fatal + " failed")
            sys.exit(returncode)
        return output

    def do_set_sparse_checkout(self, outdir: str) -> None:
        self.run_cmd([ 'git', '-C', outdir, 'sparse-checkout', 'set', self.subdir ], fatal="git sparse-checkout")

    def do_checkout(self, outdir: str, branch: str, include_submodules: bool=False) -> None:
        env = {"GIT_LFS_SKIP_SMUDGE": "1", **os.environ} if self.no_lfs else None
        cmd = [ 'git', '-C', outdir, 'checkout', '-q', branch]
        self.run_cmd(cmd, fatal="git checkout", env=env)
        if include_submodules:
            cmd = [ 'git', '-C', outdir, 'submodule', 'init' ]
            self.run_cmd(cmd, fatal="git submodule init", env=env)
            cmd = [ 'git', '-C', outdir, 'submodule', 'update' ]
            self.run_cmd(cmd, fatal="git submodule update", env=env)

    def do_clone_commit(self, outdir: str, include_submodules: bool=False) -> None:
        assert self.revision, "no revision is set but do_clone_commit was called"
        objectformat='--object-format=sha1'
        if len(self.revision) == 64:
            objectformat='--object-format=sha256'
        cmd = [ 'git', 'init', objectformat, outdir ]
        self.run_cmd(cmd, fatal="git init")
        cmd = [ 'git', '-C', outdir, 'remote', 'add', 'origin', self.scmtoolurl ]
        self.run_cmd(cmd, fatal="git remote add origin")
        cmd = [ 'git', '-C', outdir, 'fetch', 'origin', self.revision ]
        if self.shallow_clone:
            cmd += [ '--depth', '1' ]
        if self.subdir:
            cmd += [ '--filter=blob:none' ]
        if include_submodules:
            # try to select specific submodule first when a subdir is given and fall back
            if not self.subdir or self.run_cmd_nonfatal(cmd + [ "--recurse-submodules=" + self.subdir ])[0] != 0:
               cmd += [ '--recurse-submodules' ]
               self.run_cmd(cmd, fatal="git fetch")
        else:
            self.run_cmd(cmd, fatal="git fetch")
        if self.subdir:
            self.do_set_sparse_checkout(outdir)
        self.do_checkout(outdir, self.revision, include_submodules=include_submodules)

    def do_clone(self, outdir: str, include_submodules: bool=False) -> None:
        if self.revision and re.match(r"^[0-9a-fA-F]{40,}$", self.revision):
            self.do_clone_commit(outdir, include_submodules=include_submodules)
            return
        cmd = [ 'git', 'clone', self.scmtoolurl, outdir ]
        if include_submodules:
            if self.subdir:
               cmd += [ "--recurse-submodules=" + self.subdir ]
            else:
               cmd += [ '--recurse-submodules' ]
        if self.shallow_clone:
            cmd += [ '--depth', '1' ]
        if self.subdir:
            cmd += [ '--filter=blob:none', '--no-checkout' ]
        if self.revision:
            cmd.insert(2, '-b')
            cmd.insert(3, self.revision)
        env = {"GIT_LFS_SKIP_SMUDGE": "1", **os.environ} if self.no_lfs else None
        self.run_cmd(cmd, fatal="git clone", env=env)
        if self.subdir:
            self.do_set_sparse_checkout(outdir)
            self.do_checkout(outdir, 'HEAD', include_submodules=include_submodules)

    # the _scmsync.obsinfo file might become obsolete again when we store entire
    # git history by default later.
    def write_obsinfo(self) -> None:
        if not self.create_obsinfo:
            # we write it only when importing data into the source server
            return
        if not self.subdir:
            cmd = [ 'git', 'rev-parse', 'HEAD' ]
            line = self.run_cmd(cmd, cwd=self.clonedir, fatal="git rev-parse")
            commit = line.rstrip()
            cmd = [ 'git', 'log', '-n1', '--date=format:%Y%m%d', '--no-show-signature', '--pretty=format:%ct' ]
            line = self.run_cmd(cmd, cwd=self.clonedir, fatal="git rev-parse")
            tstamp = line.rstrip()
        else:
            cmd = [ 'git', 'log', '-n1', '--no-show-signature', '--pretty=format:%H %ct', '--', self.subdir]
            line = self.run_cmd(cmd, cwd=self.clonedir, fatal="git log")
            commit, tstamp = line.split()
        infofile = os.path.join(self.outdir, '_scmsync.obsinfo')
        if os.path.lexists(infofile):
            logging.error("ERROR: The _scmsync.obsinfo file must not be part of the git repository.")
            sys.exit(1)
        with open(infofile, 'x') as obsinfo:
            obsinfo.write("mtime: " + tstamp + "\n")
            obsinfo.write("commit: " + commit + "\n")
            if self.scmtoolurl:
                obsinfo.write("url: " + self.scmtoolurl + "\n")
            if self.revision:
                obsinfo.write("revision: " + self.revision + "\n")
            if self.subdir:
                obsinfo.write("subdir: " + self.subdir + "\n")
            if self.projectscmsync:
                obsinfo.write("projectscmsync: " + self.projectscmsync + "\n")

    def check_subdir(self, subdir: str):
        fromdir = os.path.join(self.clonedir, subdir)
        if not os.path.realpath(fromdir+'/').startswith(os.path.realpath(self.clonedir+'/')):
            print("ERROR: subdir " + subdir + " is not below clone directory")
            sys.exit(1)
        if not os.path.isdir(fromdir):
            print("ERROR: subdir " + subdir + " does not exist")
            sys.exit(1)

    def clone(self, include_submodules: bool=False, no_local_link: bool=False, write_service_info: bool=False) -> None:
        if not self.subdir:
            self.clonedir = self.outdir
            self.do_clone(self.outdir, include_submodules=include_submodules)
            self.write_obsinfo()
            if write_service_info:
                self.write_service_info()
            return
        clonedir = tempfile.mkdtemp(prefix="obs-scm-bridge")
        self.clonedir = clonedir
        self.do_clone(clonedir, include_submodules=include_submodules)
        self.check_subdir(self.subdir)

        fromdir = os.path.join(clonedir, self.subdir)
        if os.path.islink(fromdir):
            if no_local_link:
                print("ERROR: local link points to another local link: " + self.subdir)
                sys.exit(1)
            target = os.readlink(fromdir).rstrip('/') # this is no recursive lookup, but is there a usecase?
            if not target or '/' in target or target.startswith('.'):
                print("ERROR: only local links are supported: " + self.subdir)
                sys.exit(1)
            # switch subdir and clone again
            self.subdir=target
            shutil.rmtree(clonedir)
            self.clonedir = None
            self.clone(include_submodules=include_submodules, no_local_link=True, write_service_info=write_service_info)
            return

        if not os.path.isdir(self.outdir):
            os.makedirs(self.outdir)
        if write_obsinfo_with_subdir:
            self.write_obsinfo()
        if write_service_info:
            self.write_service_info()
        for name in os.listdir(fromdir):
            shutil.move(os.path.join(fromdir, name), self.outdir)
        shutil.rmtree(clonedir)
        self.clonedir = None

    def fetch_tags(self) -> None:
        cmd = [ 'git', '-C', self.outdir, 'fetch', '--tags', 'origin', '+refs/heads/*:refs/remotes/origin/*' ]
        logging.info("fetching all tags")
        self.run_cmd(cmd, fatal="fetch --tags")

    def cpio_directory(self, directory: str) -> None:
        logging.info("create archivefile for %s", directory)
        cmd = [ download_assets, '--create-cpio', '--', directory ]
        with open(directory + '.obscpio', 'x') as archivefile:
            self.run_cmd(cmd, stdout=archivefile, fatal="cpio creation")

    def cpio_specials(self, specials: List[str]) -> None:
        if not specials:
            return
        logging.info("create archivefile for specials")
        cmd = [ download_assets, '--create-cpio', '--', '.' ] + specials
        with open('build.specials.obscpio', 'x') as archivefile:
            self.run_cmd(cmd, stdout=archivefile, fatal="cpio creation")

    def cpio_directories(self) -> None:
        logging.debug("walk via %s", self.outdir)
        os.chdir(self.outdir)
        listing = sorted(os.listdir("."))
        specials = []
        for name in listing:
            if name in ('.git', '.gitattributes') and not self.keep_meta:
                # we do not store git meta data by default to avoid bloat storage
                continue
            if name[0:1] == '.':
                specials.append(name)
                continue
            if os.path.islink(name):
                specials.append(name)
                continue
            if os.path.isdir(name):
                logging.info("CPIO %s ", name)
                self.cpio_directory(name)
                shutil.rmtree(name)
        if specials:
            self.cpio_specials(specials)
            for name in specials:
                if os.path.isdir(name):
                    shutil.rmtree(name)
                else:
                    os.unlink(name)

    def get_assets(self) -> None:
        logging.info("downloading assets")
        cmd = [ download_assets ]
        for arch in self.arch:
            cmd += [ '--arch', arch ]
        if pack_directories:
            cmd += [ '--noassetdir', '--', self.outdir ]
        else:
            cmd += [ '--unpack', '--noassetdir', '--', self.outdir ]
        self.run_cmd(cmd, fatal="asset download")

    def copyfile(self, src: str, dst: str) -> None:
        shutil.copy2(os.path.join(self.outdir, src), os.path.join(self.outdir, dst))

    def export_debian_files(self) -> None:
        if os.path.isfile(self.outdir + "/debian/control") and \
                not os.path.isfile(self.outdir + "/debian.control"):
            self.copyfile("debian/control", "debian.control")
        if os.path.isfile(self.outdir + "/debian/changelog") and \
                not os.path.isfile(self.outdir + "/debian.changelog"):
            self.copyfile("debian/changelog", "debian.changelog")

    def get_debian_origtar(self) -> None:
        if os.path.isfile(self.outdir + "/debian/control"):
            # need up get all tags 
            if not self.subdir:
                self.fetch_tags()
            cmd = [ export_debian_orig_from_git, self.outdir ]
            logging.info("exporting debian origtar")
            self.run_cmd(cmd, fatal="debian origtar export")

    def get_subdir_info(self, path: str) -> str:
        cmd = [ 'git', '-C', self.clonedir, 'ls-tree', '-d', 'HEAD', path ]
        output = self.run_cmd(cmd, fatal="git ls-tree")
        for line in output.splitlines():
            lstree = line.split(maxsplit=4)
            if len(lstree[2]) >= 40:
               return lstree[2]
        logging.error(f"ERROR: could not determine tree info of {path}")
        sys.exit(1)

    def write_info_file(self, filename: str, info: str) -> None:
        if not filename.startswith("/"):
            filename = self.outdir + "/" + filename
        if os.path.lexists(filename):
            logging.error(f"ERROR: The {filename} file must not be part of the git repository.")
            sys.exit(1)
        with open(filename, 'x') as infofile:
            infofile.write(info + '\n')

    def write_service_info(self) -> None:
        info = None
        if self.subdir:
            #info = self.get_subdir_info(self.outdir, '')
            info = self.get_subdir_info(self.subdir)
        else:
            cmd = [ 'git', '-C', self.clonedir, 'show', '-s', '--pretty=format:%H', 'HEAD' ]
            info = self.run_cmd(cmd, fatal="git show -s HEAD")
            info = info.strip()
        if info:
            self.write_info_file(os.path.join(self.outdir, "_service_info"), info)

    def write_package_xml_file(self, name: str, url: str, projectscmsync: str=None) -> None:
        projecturlxml=''
        if self.onlybuild and not name in self.onlybuild.keys():
            return
        filename = f"{self.outdir}/{name}.xml"
        if os.path.lexists(filename):
            logging.error(f"ERROR: The {name}.xml file must not be part of the git repository.")
            sys.exit(1)
        with open(filename, 'x') as xmlfile:
            xmlfile.write(f"""<package name="{escape(name)}">\n""")
            if self.enforce_bcntsynctag:
                xmlfile.write(f"""<bcntsynctag>{escape(name)}</bcntsynctag>\n""")
            if projectscmsync:
                xmlfile.write(f"""<url>{escape(projectscmsync)}</url>\n""")
            xmlfile.write(f"""<scmsync>{escape(url)}</scmsync>\n</package>\n""")

    def write_package_xml_local_link(self, target: str, name: str, projectscmsync: str=None) -> None:
        filename = f"{self.outdir}/{name}.xml"
        if os.path.lexists(filename):
            logging.error(f"ERROR: The {name}.xml file must not be part of the git repository.")
            sys.exit(1)
        with open(filename, 'x') as xmlfile:
            xmlfile.write(f"""<package name="{escape(name)}">""")
            if self.enforce_bcntsynctag:
                xmlfile.write(f"""<bcntsynctag>{escape(name)}</bcntsynctag>""")
            if projectscmsync:
                xmlfile.write(f"""<url>{escape(projectscmsync)}</url>\n""")
            xmlfile.write(f"""</package>""")
        self.export_files.add(name + ".xml")
        filename = f"{self.outdir}/{name}.link"
        if os.path.lexists(filename):
            logging.error(f"ERROR: The {name}.link file must not be part of the git repository.")
            sys.exit(1)
        with open(filename, 'x') as linkfile:
            linkfile.write(f"""<link package="{escape(target)}" cicount="copy" />""")
        self.export_files.add(name + ".link")

    def list_submodule_revisions(self) -> None:
        self.revisions = {}
        cmd = [ 'git', '-C', self.clonedir, 'ls-tree', 'HEAD', '.' ]
        output = self.run_cmd(cmd, fatal="git ls-tree")
        for line in output.splitlines():
            lstree = line.split(maxsplit=4)
            if lstree[1] == 'commit' and len(lstree[2]) >= 40:
                self.revisions[lstree[3]] = lstree[2]

    def process_package_submodule(self, gsmsection: configparser.SectionProxy, package_name: Optional[str]=None) -> None:
        path = gsmsection['path']
        urlstr = gsmsection['url']
        if not package_name:
            package_name = path

        if package_name in self.processed:
            return

        if '/' in path:
            # we handle only top level submodules in project mode
            return

        # find revision of submodule
        if not self.revisions:
            self.list_submodule_revisions()
        assert self.revisions, "self.revisions must not be None after calling self.list_submodule_revisions"
        revision = self.revisions.get(path, None)
        if not revision:
            logging.error("Could not determine revision of submodule for " + path)
            sys.exit(1)

        # all good, write xml file and register the module
        self.gitsubmodules.add(path)
        url = list(urllib.parse.urlparse(urlstr))
        url[5] = revision
        if self.arch:
            query = urllib.parse.parse_qs(url[4], keep_blank_values=True);
            query['arch'] = self.arch
            url[4] = urllib.parse.urlencode(query, doseq=True)
        if self.add_noobsinfo:
            query = urllib.parse.parse_qs(url[4], keep_blank_values=True);
            query['noobsinfo'] = [ '1' ]
            url[4] = urllib.parse.urlencode(query, doseq=True)

        # handle relative urls in submodules
        unparsed_url = urllib.parse.urlunparse(url)
        if ".." == unparsed_url[0:2]:
            # need to append a '/' to the base url so that the relative
            # path is properly resolved, otherwise we might descend one
            # directory too far
            unparsed_url = urllib.parse.urljoin(self.scmtoolurl+'/', unparsed_url)
        if self.url[0][0:4] == 'git+':
            unparsed_url = 'git+' + unparsed_url

        projecturl = self.url.copy()
        # replace the fragment with the checked out commit id
        cmd = [ 'git', '-C', self.clonedir, 'rev-parse', 'HEAD' ]
        line = self.run_cmd(cmd, cwd=self.outdir, fatal="git rev-parse")
        projecturl[5] = line.rstrip()
        projectscmsync = urllib.parse.urlunparse(projecturl)

        self.processed[package_name] = True
        self.write_package_xml_file(package_name, unparsed_url, projectscmsync)
        self.write_info_file(package_name + ".info", revision)
        self.export_files.add(package_name + ".xml")
        self.export_files.add(package_name + ".info")

    def process_package_subdirectory(self, name: str, subdir: str) -> None:
        # current directory is self.outdir
        if not self._REGEXP.match(name):
            logging.warn("directory name contains invalid char: %s", name)
            return

        # add subdir info file
        info = self.get_subdir_info(subdir + name)
        self.write_info_file(name + ".info", info)
        self.export_files.add(name + ".info")

        # add subdir parameter to url
        url = self.url.copy()
        query = urllib.parse.parse_qs(url[4], keep_blank_values=True)
        query['subdir'] = subdir + name
        url[4] = urllib.parse.urlencode(query, doseq=True)
        if self.revision:
            url[5] = self.revision
        if self.arch:
            query = urllib.parse.parse_qs(url[4], keep_blank_values=True);
            query['arch'] = self.arch
            url[4] = urllib.parse.urlencode(query, doseq=True)
        if self.add_noobsinfo:
            query = urllib.parse.parse_qs(url[4], keep_blank_values=True);
            query['noobsinfo'] = 1
            url[4] = urllib.parse.urlencode(query, doseq=True)

        self.write_package_xml_file(name, urllib.parse.urlunparse(url))
        self.export_files.add(name + ".xml")

    def generate_project_files(self) -> None:
        clonedir = tempfile.mkdtemp(prefix="obs-scm-bridge")
        self.clonedir = clonedir
        self.do_clone(clonedir)
        if self.subdir:
            self.check_subdir(self.subdir)
        if not os.path.isdir(self.outdir):
            os.makedirs(self.outdir)
        if not self.subdir or write_obsinfo_with_subdir:
            self.write_obsinfo()
        os.chdir(self.outdir)
        subdir = self.subdir + '/' if self.subdir else ''
        self.generate_package_xml_files_of_directory(subdir)
        shutil.rmtree(clonedir)
        self.clonedir = None

    def generate_package_xml_files_of_directory(self, subdir) -> None:
        directory = (self.clonedir + '/' + subdir).rstrip('/')
        logging.debug("check %s (subdir=%s)", directory, subdir)

        if os.path.isfile(directory + '/_subdirs'):
            subdir_yml = None
            with open(directory + '/_subdirs') as stream:
                subdir_yml = yaml.safe_load(stream)
            for newsubdir in subdir_yml['subdirs']:
                if not newsubdir:
                    continue
                if (subdir + newsubdir + '/') in self.processed:
                    continue
                self.processed[subdir + newsubdir + '/'] = True
                self.generate_package_xml_files_of_directory(subdir + newsubdir + '/')
            if 'toplevel' not in subdir_yml or subdir_yml['toplevel'] != 'include':
                return

        # find all top level git submodules
        if '.gitmodules' not in self.processed and os.path.isfile(self.clonedir + '/.gitmodules'):
            self.processed['.gitmodules'] = True
            gsmconfig = None
            gsmconfig = configparser.ConfigParser()
            gsmconfig.read(self.clonedir + '/.gitmodules')
            for section in gsmconfig.sections():
                if not 'path' in gsmconfig[section]:
                    logging.warn("path not defined for git submodule " + section)
                    continue
                if self.subdir and not (gsmconfig[section][path] + '/').startswith(self.subdir + '/'):
                    continue
                if not 'url' in gsmconfig[section]:
                    logging.warn("url not defined for git submodule " + section)
                    continue
                self.process_package_submodule(gsmconfig[section])

        # handle plain files and directories
        logging.debug("walk via %s", directory)
        listing = sorted(os.listdir(directory))
        for name in listing:
            if name in self.processed:
                continue                # already handled
            fname = directory + '/' + name
            if os.path.islink(fname):
                target = os.readlink(fname).rstrip('/') # this is no recursive lookup, but is there a usecase?
                if not target or '/' in target or target.startswith('.'):
                    logging.warn("only local links are supported, skipping: " + name)
                    continue
                if target in self.gitsubmodules or os.path.isdir(directory + '/' + target):
                    self.write_package_xml_local_link(target, name)
                    self.processed[name] = True
                else:
                    logging.debug("skipping symlink to a non git submodule %s -> %s", name, target)
                continue

            if name == '.git':
                if self.keep_meta and subdir == '':
                    shutil.move(fname, '.')
                self.processed[name] = True
            elif os.path.isdir(fname):
                if name in self.gitsubmodules:
                    continue            # already handled as git submodule
                if name in self.processed:
                    continue
                if (subdir + name + '/') in self.processed:
                    continue            # already handled in _subdir loop
                self.process_package_subdirectory(name, subdir)
                self.processed[name] = True
            elif name in self.export_files:
                shutil.move(fname, '.')
                self.processed[name] = True

if __name__ == '__main__':

    parser = argparse.ArgumentParser(
        description='Open Build Service source service for managing packaging files in git.'
        'This is a special service for OBS git integration.')
    parser.add_argument('--outdir', required=True,
                        help='output directory for modified sources',
                        nargs=1,
                        type=str)
    parser.add_argument('--url',
                        help='REQUIRED: url to git repository',
                        required=True,
                        nargs=1,
                        type=str)
    parser.add_argument('--projectmode',
                        help='just return the package list based on the subdirectories')
    parser.add_argument('--projectscmsync',
                        help='add also reference information of a project git for a package clone')
    parser.add_argument('--debug',
                        help='verbose debug mode')
    args = vars(parser.parse_args())

    url = args['url'][0]
    outdir = args['outdir'][0]
    project_mode = args['projectmode']
    projectscmsync = args['projectscmsync']

    if args['debug']:
        logging.getLogger().setLevel(logging.DEBUG)
        logging.debug("Running in debug mode")

    # workflow
    obsgit = ObsGit(outdir, url, projectscmsync)
    obsgit.add_critical_instance("src.opensuse.org")
    if os.path.isfile(critical_instances_config):
       with open(critical_instances_config) as conf:
           for line in conf.readlines():
               obsgit.add_critical_instance(line.rstrip())
    if os.path.isfile(credentials_config):
        obsgit.setup_credentials(credentials_config)

    if project_mode == 'true' or project_mode == '1':
        obsgit.generate_project_files()
        sys.exit(0)

    obsgit.clone(include_submodules=True, write_service_info=pack_directories)

    if get_assets:
        obsgit.get_assets()
        obsgit.get_debian_origtar()
    if pack_directories:
        obsgit.export_debian_files()
        obsgit.cpio_directories()

