#!/usr/bin/env python3

import logging
import argparse
import re
import libarchive
import os
import shutil
import sys
import tempfile

from pathlib import Path
from subprocess import run

app_name = "obs-service-npm_modules"

description = __doc__

DEFAULT_COMPRESSION = "gz"
DEFAULT_VENDOR_STEM = "node_modules"


def get_archive_parameters(args):
    archive = {}
    archive["vendorname"] = args.vendorname
    archive["compression"] = None
    archive["level"] = None
    if args.compression == "obscpio" and "cpio" in libarchive.ffi.READ_FORMATS:
        archive["format"] = "cpio_newc"
        archive["ext"] = "obscpio"
        return archive

    archive["format"] = "gnutar"
    if args.compression == "tar" and "tar" in libarchive.ffi.READ_FORMATS:
        archive["ext"] = "tar"
        return archive

    archive["level"] = 9
    if args.compression == "gz":
        archive["compression"] = "gzip"
    elif args.compression == "zst":
        archive["compression"] = "zstd"
        archive["level"] = 19
    else:
        archive["compression"] = args.compression

    if archive["compression"] not in libarchive.ffi.READ_FILTERS:
        log.error(
            f"The specified compression mode is not supported: {args.compression}"
        )
        exit(1)

    archive["ext"] = "tar." + (args.compression)
    return archive


def basename_from_archive_name(archive_name):
    if os.path.isdir(archive_name):
        basename = archive_name
    else:
        basename = re.sub(
            r"^(?P<service_prefix>_service:[^:]+:)?(?P<basename>.*)\.(?P<extension>obscpio|tar\.[^\.]+)$",
            r"\g<basename>",
            archive_name,
        )
    if basename:
        log.info(f"Detected basename {basename} from archive name")
    return basename


def basename_from_archive(archive_name):
    paths = []
    try:
        with libarchive.file_reader(archive_name) as archive:
            for entry in archive:
                if entry.isdir and ".git" not in entry.pathname:
                    paths.append(entry.name)
            try:
                basename = os.path.commonpath(paths)
            except ValueError:
                return
    except libarchive.exception.ArchiveError:
        return
    log.info(f"Detected basename {basename} from archive")
    return basename


def archive_autodetect():
    """Find the most likely candidate file that contains go.mod and go.sum.
    For most Go applications this will be app-x.y.z.tar.gz or other supported compression.
    Use the name of the .spec file as the stem for the archive to detect.
    Archive formats supported:
    - .tar.bz2
    - .tar.gz
    - .tar.lz
    - .tar.xz
    - .tar.zst
    - .obscpio
    Returns str with filename of the archive or subdirectory
    """
    log.info("Autodetecting archive since no archive param provided in _service")
    specs = sorted(Path.cwd().glob("*.spec"), reverse=True)
    if not specs:
        log.error(f"No spec file found in {Path.cwd()}")
        exit(1)

    archive = None
    spec = specs[0]
    c_exts = ("gz", "xz", "zst", "lz", "bz2")
    for pattern in (
        [f"{spec.stem}*.tar.{c_ext}" for c_ext in c_exts]
        + [f"{spec.stem}*.obscpio"]
        + [f"_service:*:{spec.stem}*tar.{c_ext}" for c_ext in c_exts]
        + [f"_service:*:{spec.stem}*obscpio"]
        + [spec.stem]
    ):
        log.debug(f"Trying to find archive name with pattern {pattern}")
        matches = sorted(spec.parent.glob(pattern), reverse=True)

        if matches:
            archive = matches[0]
            break

    if not archive:
        log.error("Archive autodetection found no matching archive")
        exit(1)

    log.info(f"Archive autodetected at {archive} ({type(archive)})")

    if archive.name != spec.stem:
        # Check that app.spec Version: directive value
        # is a substring of detected archive filename
        # Warn if there is disagreement between the versions.
        pattern = re.compile(r"^Version:\s+([\S]+)$", re.IGNORECASE)
        with spec.open(encoding="utf-8") as f:
            for line in f:
                versionmatch = pattern.match(line)
                if versionmatch:
                    version = versionmatch.groups(0)[0]
            if not version:
                log.warning(f"Version not found in {spec.name}")
            else:
                if not (version in archive.name):
                    log.warning(
                        f"Version {version} in {spec.name} does not match {archive.name}"
                    )
    return str(archive.name)  # return string not PosixPath


def extract(filename, outdir):
    log.info(f"Extracting {filename} to {outdir}")

    cwd = os.getcwd()

    # make path absolute so we can switch away from the current working directory
    filename = os.path.join(cwd, filename)

    log.info(f"Switching to {outdir}")
    os.chdir(outdir)

    try:
        libarchive.extract_file(filename, libarchive.extract.EXTRACT_TIME)
    except libarchive.exception.ArchiveError as archive_error:
        log.error(archive_error)
        exit(1)

    os.chdir(cwd)


def node_install(moddir, npm='npm'):
    """Execute node install subcommand using subprocess.run().
    Capture both stderr and stdout as text.
    Log as info or error in this function body.
    Return CompletedProcess object to caller for control flow.
    """
    cmd = [npm, "install"]
    log.info(" ".join(cmd))
    # subprocess.run() returns CompletedProcess cp
    if sys.version_info >= (3, 7):
        cp = run(cmd, cwd=moddir, capture_output=True, text=True)
    else:
        cp = run(cmd, cwd=moddir)
    if cp.returncode:
        log.error(cp.stderr.strip())
    return cp

def sanitize_subdir(basedir, subdir):
    ret = os.path.normpath(subdir)
    if basedir == os.path.commonpath([basedir, ret]):
        return ret
    log.error(f"Invalid path: {ret} not subdir of {basedir}")
    exit(1)


def main():
    log.info(f"Running OBS Source Service: {app_name}")

    parser = argparse.ArgumentParser(
        description=description, formatter_class=argparse.RawDescriptionHelpFormatter
    )
    parser.add_argument("--archive")
    parser.add_argument("--outdir")
    parser.add_argument("--compression", default=DEFAULT_COMPRESSION)
    parser.add_argument("--basename")
    parser.add_argument("--npm", default="npm")
    parser.add_argument("--vendorname", default=DEFAULT_VENDOR_STEM)
    parser.add_argument("--subdir")

    args = parser.parse_args()

    npm = args.npm
    outdir = args.outdir
    subdir = args.subdir

    archive_args = get_archive_parameters(args)
    vendor_tarname = f"{archive_args['vendorname']}.{archive_args['ext']}"
    if args.archive:
        archive_matches = sorted(Path.cwd().glob(args.archive), reverse=True)
        if not archive_matches:
            log.error(f"No archive file matches {Path.cwd()}/{args.archive}")
            exit(1)
        archive = str(archive_matches[0])  # use string, not PosixPath
    else:
        archive = archive_autodetect()
    log.info(f"Using archive {archive}")

    with tempfile.TemporaryDirectory() as tempdir:
        if os.path.isdir(archive):
            # Preserve symlinks during copy, some Go tests test symlink loops
            shutil.copytree(
                src=archive, dst=os.path.join(tempdir, archive), symlinks=True
            )
        else:
            extract(archive, tempdir)

        basename = (
            args.basename
            or basename_from_archive(archive)
            or basename_from_archive_name(archive)
        )
        basename = basename.split("/")[-1]
        if subdir:
            go_mod_path = sanitize_subdir(
                tempdir, os.path.join(tempdir, basename, subdir, "package.json")
            )
        else:
            go_mod_path = sanitize_subdir(
                tempdir, os.path.join(tempdir, basename, "package.json")
            )
        if go_mod_path and os.path.exists(go_mod_path):
            go_mod_dir = os.path.dirname(go_mod_path)
            log.info(f"Using package.json found at {go_mod_path}")
        else:
            log.error(f"File package.json not found under {os.path.join(tempdir, basename)}")
            exit(1)

        if True:
            # npm subcommand sequence:
            # - npm install

            # return value cp is type subprocess.CompletedProcess
            cp = node_install(go_mod_dir, npm)
            if cp.returncode:
                log.error("npm install failed")
                exit(1)

            log.info(f"Vendor package.json dependencies to {vendor_tarname}")
            vendor_tarfile = os.path.join(outdir, vendor_tarname)
            cwd = os.getcwd()
            os.chdir(go_mod_dir)
            vendor_dir = "node_modules"

            mtime = os.path.getmtime(go_mod_path)
            log.debug(f"Set archive files times to {mtime}")

            options = []
            if archive_args["compression"] == "gzip":
                options.append("!timestamp")
            if archive_args["level"]:
                options.append(f"compression-level={archive_args['level']}")
            with libarchive.file_writer(
                vendor_tarfile,
                archive_args["format"],
                archive_args["compression"],
                options=",".join(options),
            ) as new_archive:
                try:
                    new_archive.add_files(
                        vendor_dir, mtime=mtime, ctime=mtime, atime=mtime
                    )
                except (
                    TypeError
                ):  # If using old libarchive fallback to old non reproducible behavior
                    log.warning(
                        "python libarchive is too old, unable to produce reproducible output"
                    )
                    new_archive.add_files(vendor_dir)
            os.chdir(cwd)


if __name__ == "__main__":
    logging.basicConfig(level=logging.DEBUG)
    log = logging.getLogger(app_name)
    main()
