#!/usr/bin/env python3
#
# An OBS Source Service to retrieve and verify Go module sources
# as specified in go.mod and go.sum.
#
# (C) 2019 SUSE LLC
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
# See http://www.gnu.org/licenses/gpl-2.0.html for full license text.
#
"""\
OBS Source Service to download, verify and vendor Go module
dependency sources. Using go.mod and go.sum present in a Go
application, call go tools in sequence:

go mod download
go mod verify
go mod vendor

obs-service-go_modules will create a vendor tarball, compressed with
the specified method (default to "gz"), containing the
vendor/ directory populated by go mod vendor.

See README.md for additional documentation.
"""

import logging
import argparse
import re
import os
import shutil
import sys
from pathlib import Path, PosixPath
from subprocess import run, CompletedProcess
from typing import Dict, Optional

import libarchive

APP_NAME = "obs-service-go_modules"
DESCRIPTION = __doc__

DEFAULT_COMPRESSION = "gz"


def _get_output_archive_parameters(compression) -> Dict[str, str]:
    archive_format = None
    archive_compression = None
    archive_extension = None

    if compression == "obscpio" and "cpio" in libarchive.ffi.READ_FORMATS:
        archive_format = "cpio_newc"
        archive_compression = None
        archive_extension = compression

    elif compression == "tar" and "tar" in libarchive.ffi.READ_FORMATS:
        archive_format = "gnutar"
        archive_compression = None
        archive_extension = compression

    else:
        compression_format = compression

        if compression == "gz":
            compression_format = "gzip"

        elif compression == "zst":
            compression_format = "zstd"

        if compression_format not in libarchive.ffi.READ_FILTERS:
            log.error(
                'The specified compression mode is not supported: "%s"', compression
            )
            sys.exit(1)

        archive_format = "gnutar"
        archive_compression = compression_format
        archive_extension = f"tar.{compression}"

    return {'format': archive_format, 'compression': archive_compression, 'ext': archive_extension}


def _basename_from_archive_name(archive_name) -> str:
    return re.sub(
        r"^(?P<service_prefix>_service:[^:]+:)?(?P<basename>.*)\.(?P<extension>obscpio|tar\.[^\.]+)$", # pylint: disable=line-too-long
        r"\g<basename>",
        archive_name,
    )


def _source_archive_autodetect() -> str:
    """Find the most likely candidate file that contains go.mod and go.sum.
    For most Go applications this will be app-x.y.z.tar.gz or other supported compression.
    Use the name of the .spec or .dsc file as the stem for the archive to detect.
    Archive formats supported:
    - .obscpio (generated by obs_scm service)
    - .tar.gz
    - .tar.xz
    - .tar.zstd
    """
    log.info("Autodetecting archive since no archive param provided in _service")
    cwd = Path.cwd()
    # first .spec under cwd or None
    spec = next(reversed(sorted(cwd.glob("*.spec"))), None)
    # first .dsc under cwd or None
    dsc = next(reversed(sorted(cwd.glob("*.dsc"))), None)
    if not spec and not dsc:
        log.error("Archive autodetection found no spec or dsc file under %s", cwd)
        sys.exit(1)
    else:
        pkgfile = spec if spec else dsc

        archive = None
        spec_dir = pkgfile.parent  # typically the same as cwd
        spec_stem = pkgfile.stem  # stem is app in app.spec
        # highest sorted archive under spec_dir
        patterns = [
            f"{spec_stem}*.tar.*",
            f"{spec_stem}*.obscpio",
            f"_service:*:{spec_stem}*tar.*",
            f"_service:*:{spec_stem}*obscpio",
        ]
        for pattern in patterns:
            log.debug("Trying to find archive name with pattern %s", pattern)
            archive = next(reversed(sorted(spec_dir.glob(pattern))), None)

            if archive:
                break

    if not archive:
        log.error("Archive autodetection found no matching archive")
        sys.exit(1)

    log.info("Archive autodetected at %s", archive)
    # Check that app.spec Version: directive value
    # is a substring of detected archive filename
    # Warn if there is disagreement between the versions.
    pattern = re.compile(r"^Version:\s+([\S]+)$", re.IGNORECASE)
    with pkgfile.open(encoding="utf-8") as file:
        for line in file:
            versionmatch = pattern.match(line)
            if versionmatch:
                version = versionmatch.groups(0)[0]
        if not version:
            log.warning("Version not found in %s", spec.name)
        else:
            if version not in archive.name:
                log.warning(
                    "Version %s in %s does not match %s",
                    version,
                    pkgfile.name,
                    archive.name
                )
    return str(archive.name)  # return string not PosixPath


def _extract(filename, outdir) -> None:
    log.info("Extracting %s to %s", filename, outdir)

    cwd = Path.cwd()

    # make path absolute so we can switch away from the current working directory
    filename = cwd.joinpath(filename)

    log.info("Switching to %s", outdir)
    os.chdir(outdir)

    try:
        libarchive.extract_file(filename.as_posix())
    except libarchive.exception.ArchiveError as archive_error:
        log.error(archive_error)
        sys.exit(1)
    finally:
        os.chdir(cwd)


def _find_file(path, filename) -> Optional[PosixPath]:
    for file in path.rglob(filename):
        return file
    return None


def _cmd_go_mod(cmd, gdir) -> CompletedProcess:
    """Execute go mod subcommand using subprocess.run().
    Capture both stderr and stdout as text.
    Log as info or error in this function body.
    Return CompletedProcess object to caller for control flow.
    """
    log.info("go mod %s", cmd)
    # subprocess.run() returns CompletedProcess
    pres = run(["go", "mod", cmd], cwd=gdir, capture_output=True, text=True, check=False)
    log.debug(pres.stdout.strip())
    if pres.returncode:
        log.error(pres.stderr.strip())
    return pres


def _get_gomoddir(outdir) -> PosixPath:
    go_mod_path = _find_file(outdir, "go.mod")
    if go_mod_path:
        log.info("Using go.mod found at %s", go_mod_path)
        return go_mod_path.parent

    log.error("File go.mod not found under %s", outdir)
    sys.exit(1)


def _create_vendor_archive(args, go_mod_dir, outdir, output_spec) -> None:
    # go subcommand sequence:
    # - go mod download
    #   (is sensitive to invalid module versions, try and log warn if fails)
    # - go mod vendor
    #   (also downloads but use separate steps for visibility in OBS environment)
    # - go mod verify
    #   (validates checksums)

    vendor_tarname = f"vendor.{output_spec['ext']}"
    vendor_tarfile = outdir.joinpath(vendor_tarname)
    vendor_dir = "vendor"

    os.environ['GOMODCACHE'] = '/srv/obs/service/gomodcache'
    if args.goproxy:
        os.environ['GOPROXY'] = args.goproxy
    if args.goprivate:
        os.environ['GONOSUMDB'] = args.goprivate

    # return value cres is type subprocess.CompletedProcess
    cres = _cmd_go_mod("download", go_mod_dir)
    if cres.returncode:
        if "invalid version" in cres.stderr:
            log.warning(
                "go mod download is more sensitive to invalid module versions than go mod vendor"
            )
            log.warning(
                "if go mod vendor and go mod verify complete, vendoring is successful"
            )
        else:
            log.error("go mod download failed")
            sys.exit(1)

    cres = _cmd_go_mod("vendor", go_mod_dir)
    if cres.returncode:
        log.error("go mod vendor failed")
        sys.exit(1)

    cres = _cmd_go_mod("verify", go_mod_dir)
    if cres.returncode:
        log.error("go mod verify failed")
        sys.exit(1)

    log.info("Vendor go.mod dependencies to %s", vendor_tarname)
    cwd = Path.cwd()
    os.chdir(go_mod_dir)

    with libarchive.file_writer(
            vendor_tarfile.as_posix(), output_spec['format'], output_spec['compression']
    ) as new_archive:
        new_archive.add_files(vendor_dir)
    os.chdir(cwd)


def _main() -> None:
    log.info("Running OBS Source Service: %s", APP_NAME)

    parser = argparse.ArgumentParser(
        description=DESCRIPTION, formatter_class=argparse.RawDescriptionHelpFormatter
    )
    parser.add_argument("--strategy", default="vendor")
    parser.add_argument("--archive")
    parser.add_argument("--outdir")
    parser.add_argument("--compression", default=DEFAULT_COMPRESSION)
    parser.add_argument("--goproxy")
    parser.add_argument("--goprivate")
    args = parser.parse_args()

    outdir = Path(args.outdir)

    output_spec = _get_output_archive_parameters(args.compression)
    basename = None

    source_archive = args.archive or _source_archive_autodetect()
    log.info("Using archive %s", source_archive)

    basename = _basename_from_archive_name(source_archive)
    _extract(source_archive, outdir)

    go_mod_dir = _get_gomoddir(outdir)

    if args.strategy == "vendor":
        _create_vendor_archive(args, go_mod_dir, outdir, output_spec)

    # remove extracted Go application source
    try:
        if basename:
            to_remove = outdir.joinpath(basename)
            log.info("Cleaning up working dir %s", to_remove)
            shutil.rmtree(to_remove)
    except FileNotFoundError:
        log.error("Could not remove directory not found %s", to_remove)


if __name__ == "__main__":
    logging.basicConfig(level=logging.INFO)
    log = logging.getLogger(APP_NAME) # pylint: disable=invalid-name
    _main()
