#!/usr/bin/python3
#
# Copyright (c) 2018 SUSE Linux GmbH
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of version 3 of the GNU General Public License as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, contact SUSE LLC.
#
# To contact SUSE about this file by physical or electronic mail,
# you may find current contact information at www.suse.com

"""
Generates a basic default XML configuration for docserv from
Git repositories.
"""

import os
import re
import sys
import pygit2
import argparse
from lxml import etree
from contextlib import contextmanager

FIXME = "FIXME"
SCHEMAVERSION = "5.0"

def get_origin_url(path):
    """Return the repo origin url

    :param path: The path to local repository
    :type path: str

    :return: the URL address
    :rtype: str
    """
    repo = pygit2.Repository(path)
    origin = repo.remotes["origin"]
    url = origin.url
    # example: ('git@')('github.com')(':')('openSUSE/docserv.git')
    regex = r'(^[a-zA-Z]+@)([a-zA-Z.]+)([:])(.*)'
    match = re.match(regex, url)
    if match:
        # substitute ':' with '/'
        url = url.replace(match.group(3), "/")
        # substitute 'git@' with 'https://'
        url = url.replace(match.group(1), "https://")
    return url

def get_version(name):
    """Returns version from product name
    Example:
    >>> get_version("SLE12SP4")
    '12 SP4'

    :param name: string with a product name
    :type name: str

    :return: the version of a product
    :rtype: str
    """
    # example: ('SLE')('12')('SP4')
    version_regex = r'(^[a-zA-Z_]+)([0-9.]+)?([a-zA-Z0-9.]+)?'
    match = re.match(version_regex, name)
    try:
        if match.group(2):
              # substitute 'SLE' with '', removing product name
              version = re.sub(r'(^[a-zA-Z_]+)', "", name)
              if match.group(3):
                  # replace '12' with '12 ', adding space
                  version = version.replace(match.group(2), match.group(2) + " ")
              # returning '12 SP4'
              return version
    except AttributeError:
        return name

def prettify(root):
    """Return a pretty-printed XML string for the element.

    :param root: The XML tree
    :type root: :class:`xml.etree._Element`

    :return: the indented string
    :rtype: str
    """
    return etree.tostring(root, pretty_print=True, encoding="unicode")

def generate_config(args, git_url, branches, version=SCHEMAVERSION):
    """Generate the configuration file.

    :param args: The namespace containing the scripts arguments
    :param str git_url: The repo origin url

    :param branches: The branch name and DC files on it, for example:
               {'develop': ['DC-FOO-all', 'DC-FOO-install-quick'], ... }
    :type branches: dict

    :param dict subdeliverables: a dictionary with lists as keys like:
               {'DC-file1-all': ['book.one', 'art.two'], 'DC-file2-all': [], ...}

    :param str version: the schema version to use
    """
    root = etree.Element("product", productid=args.productid, schemaversion=version)
    etree.SubElement(root, "name").text = FIXME
    etree.SubElement(root, "acronym").text = FIXME
    maintainers = etree.SubElement(root, "maintainers")
    etree.SubElement(maintainers, "contact").text = FIXME if not args.contact else args.contact

    for language in args.langs:
        is_default_lang = 0
        if language == args.default_lang:
            is_default_lang = 1
        description = etree.SubElement(root, "desc", default=str(is_default_lang), lang=language)
        etree.SubElement(description, "p").text = FIXME

    for branch in branches:
        if branch.startswith("trans/"):
            continue
        setid = branch.replace("maintenance/", "")
        product_version = get_version(setid)
        docset = etree.SubElement(root, "docset", setid=setid, lifecycle=FIXME)

        etree.SubElement(docset, "version").text = product_version
        builddocs = etree.SubElement(docset, "builddocs")
        # Optionally <git>, look at product/git
        etree.SubElement(builddocs, "git", remote=git_url)

        for language in args.langs:
            if language == args.default_lang:
                lang = etree.SubElement(builddocs, "language",
                                        default="1", lang=language)
                etree.SubElement(lang, "branch").text = branch
                has_all_dc = "1"
                for dc_file in branches[branch]:
                    if "-all" in dc_file:
                        has_all_dc = "0"
                for dc_file in sorted(branches[branch]):
                    deliverable = etree.SubElement(lang, "deliverable")
                    etree.SubElement(deliverable, "dc", ).text = dc_file
                    dc_all_file = dc_file.endswith("-all")
                    attrs = {'pdf': "0" if dc_all_file else "1",
                             'epub': "0" if dc_all_file else "1",
                             'html': "1" if dc_all_file else has_all_dc,
                             'single-html': "0" if dc_all_file else "1",
                             }
                    etree.SubElement(deliverable, "format", **attrs)
                    if dc_file.endswith("-all"):
                        for sub in sorted(branches[branch][dc_file]):
                            etree.SubElement(deliverable, "subdeliverable", ).text = sub
            else:
                lang = etree.SubElement(builddocs, "language", lang=language,
                       attrib={'translation-type':"full"})
                if branch.startswith("maintenance/") or branch.startswith("maint/"):
                    tmp = branch.replace("maintenance/", "").replace("maint/", "")
                    if "trans/%s" % tmp in branches:
                        branch = "trans/%s" % tmp
                etree.SubElement(lang, "branch").text = branch
                etree.SubElement(lang, "subdir").text = FIXME
        # TODO: add external/links as comments
        # externallinks = etree.SubElement(docset, "external")
        # etree.SubElement(externallinks, "link", href=FIXME).text = "SOME DESCRIPTION"

    tree = etree.ElementTree(root)
    configfile = os.path.join(os.getcwd(), "{}.xml".format(args.productid))
    print("Find the config file at {!r}".format(configfile))

    with open(configfile, 'w+t') as fh:
        fh.write(prettify(root))
    print("Done.")

@contextmanager
def switchbranch(repo):
    """Context manager to checkout branches

    :param repo: the repo to the Git repository
    :type repo: :class:`pygit2.Repository`
    :return: function, see: checkout_branch
    :rtype: function reference
    """
    def checkout_branch(branches):
        """Generator to iterate over branches, which checks out the current
            branch automatically

        :param list branches: the list of branches as strings
        :yield: current branch as string
        """
        for branch in branches:
            remote_branch = repo.branches.remote["origin/" + branch]
            ref_branch = repo.lookup_branch(remote_branch.branch_name, pygit2.GIT_BRANCH_REMOTE)
            repo.checkout(ref_branch)
            yield branch
    # save initial branch
    initial_branch = repo.head.shorthand
    initial_ref = repo.lookup_branch(initial_branch, pygit2.GIT_BRANCH_ALL)
    try:
        yield checkout_branch
    finally:
        # change back to initial branch
        repo.checkout(initial_ref)

def get_deliverables(path, filter_dc=None):
    """Collect all DC files and subdeliverables in all branches.

    :param str path: the path to the Git repository
    :param dict branches: A dictionary with branch names
        {
            'branch1': {},
            'branch2': {}
        }
    :param filter_dc: filter
    :return: A dictionary with all branches, DC-files and subdeliverables
        { 'branch1': {
            'DC-blub-all' : ['book.bla', 'book.blu'],
            'DC-blub-bla' : [],
            'DC-blub-blu' : [], ... },
          'branch2': {
            'DC-blubs-all' : ['book.ble', 'book.bli'],
            'DC-blubs-ble' : [],
            'DC-blubs-bli' : [], ... }
        }
    :rtype: dict
    """
    repo = pygit2.Repository(path)
    branches = get_git_branches(path)
    with switchbranch(repo) as checkout_branch:
        for branch in checkout_branch(branches):
            # some branches have their dc-files in a 'en' folder
            path2git = path + "/en" if "en" in os.listdir(path) else path
            # collecting dc-files in this branch
            branches[branch] = collect_dc_files(path2git, filter_dc)
            # collecting subdeliverables
            for dc_file in branches[branch]:
                # remove/change this 'if' to go through other DC-files as well
                if dc_file.endswith("-all"):
                    main_all = get_main_rootid(path2git, dc_file)[0]
                    branches[branch][dc_file] = collect_subdeliverables(path2git, branches[branch], main_all)
    return branches

def get_main_rootid(path, dc_file):
    """Get the MAIN and ROOTID values from a DC-file

    :param str path: the path to the Git repository
    :param str dc_file: the DC-file to look in
    :return: a tuple with two strings: MAIN and ROOTID vaules
    :rtype: tuple(str, str)
    """
    main = rootid = ""
    regex = r'(?P<name>[A-Z]+)[ \t]*=[ \t]*(?:"|\')?(?P<value>[\w\.\-]+)(?:"|\')?'
    with open(os.path.join(path, dc_file), "r") as f:
        for line in f:
            match = re.match(regex, line)
            if match and match.group('name') == "MAIN":
                 main = match.group('value')
            if match and match.group('name') == "ROOTID":
                 rootid = match.group('value')
    return main, rootid

def collect_subdeliverables(path, dc_files, main_all):
    """Collect all subdeliverables for given MAIN-file

    :param str path: the path to the Git repository
    :param list dc_files: the list with DC-files from current branch
    :param str main_all: MAIN value of current DC-all-file
    :return: a list with all subdeliverables found after looking in all DC-files
    :rtype: list
    """
    subdeliverables = []
    for dc_file in dc_files:
        if dc_file.endswith("-all"):
            continue
        main, rootid = get_main_rootid(path, dc_file)
        if main == main_all and rootid and rootid not in subdeliverables:
            subdeliverables.append(rootid)
    return subdeliverables

def collect_dc_files(path, filter_dc):
    """Collect all DC files in current branch

    :param str path: the path to the Git repository
    :param str filter_dc: filter
    :return: a dictionary with all DC files found in this branch:
        {
            'DC-blub-all' : [],
            'DC-blub-bla' : [],
            'DC-blub-blu' : [], ... }
    :rtype: dict
    """
    dc_files = {}
    for file in os.listdir(path):
        if os.path.isfile(os.path.join(path, file)) and file.startswith("DC-"):
            if filter_dc is None or filter_dc in file:
                dc_files[file] = []
    return dc_files

def get_git_branches(path):
    """Return all branches of a Git repository. Replace "origin/" with ""

    :param str path: the path to the Git repository
    :return: dictionary of all branches for example:
            {
                'develop': {},
                'master':  {}, ... }
    :rtype: dict
    """
    repo = pygit2.Repository(path)
    branches = {}
    for branch in sorted(repo.listall_branches(pygit2.GIT_BRANCH_REMOTE)):
        branch_wo_origin = branch.replace("origin/", "")
        if branch.startswith('maintenance/') or branch.startswith('trans/'):
            branches[branch] = {}
        elif branch_wo_origin == "develop" \
             or branch_wo_origin == "master" \
             or branch_wo_origin == "main" \
             or branch.startswith('origin/trans/') \
             or branch.startswith('origin/maint/') \
             or branch.startswith('origin/maintenance/'):
            branches[branch_wo_origin] = {}
    return branches

def check_langs(langs, message, attempts = 3):
    """Checks if langs were given, gives three input attempts

    :param str langs: Language(s) to check
    :param str message: Message to print
    :param int attempts: Allowed number of attempts to specify valid languages
    :return: List of checked and formatted language(s)
    :rtype: list
    """
    languages = ("en-us de-de fr-fr pt-br ja-jp zh-cn es-es "
                      "it-it ko-kr hu-hu zh-tw cs-cz ar-ar pl-pl "
                      "ru-ru").split(" ")
    while attempts:
        if not langs:
            langs = input(message)
        # format given langs, remove white space
        langs = re.split(r'[, ]+', langs.replace("_", "-").strip().lower())
        langs = [lang.strip() for lang in langs if not lang == '']
        # create a diff from our list and the languages list from above
        diff = set(langs).difference(languages)
        if diff or not langs:
            attempts -= 1
            # reset langs because they're not good
            langs = ""
            if diff:
                print("Unknown language {} found.".format(diff))
            else:
                print("No valid input language found.")
            print("Expected one or more of the following languages:\n"
                "{}".format(", ".join(languages)))
        else:
            return langs
    print("Exiting. You entered invalid input too often.")
    sys.exit(1)

def check_productid(productid, message, attempts = 3):
    """Check that productid matches the ID-type regex from the config schema.

    :param productid: The product ID supplied from the command line
    :param message: Message to print
    :attempts: Allowed number of attempts to specify a valid product ID
    :return: Validated product ID
    :rtype: string
    """
    productid_regex = r'(?P<productid>[-_.+a-zA-Z0-9]+)'
    message_expected = ("Only alphanumeric characters and the special "
                        "characters -_.+ are allowed.")
    while attempts:
        if not productid:
            productid = input(message)
        match = re.match(productid_regex, productid)
        if match and match.group('productid') == productid:
            return productid
        attempts -= 1
        print("Invalid Product ID '{}' given".format(productid))
        print(message_expected)
        productid = ""
    print("Exiting. You entered invalid input too often.")
    sys.exit(1)

def check_args(args):
    """Checks if no arguments were set when running the script and asks for them.

    :param args: The namespace containing the scripts arguments
    :type args: :class:`argparse.Namespace`
    """
    # checking path
    while not args.path or not os.path.exists(args.path):
        args.path = input("- Enter the path to the git repository clone: ")
    # checking languages
    lang_message = "- Enter translation languages to check for (comma-separated, format: 'language-country'): "
    args.langs = check_langs(args.langs, lang_message)
    # checking default language
    default_message = "- Enter the language code of the default language (format: 'language-country'): "
    args.default_lang = check_langs(args.default_lang, default_message)[0]
    # insert default_lang at first position:
    if args.default_lang in args.langs:
        args.langs.remove(args.default_lang)
    args.langs.insert(0, args.default_lang)
    # checking filter
    if not args.filter:
        args.filter = input("- Enter a filter string for DC files (can be left empty): ")
    # checking contact
    while not args.contact:
        args.contact = input("- Enter the e-mail address of the primary documentation maintainer: ")
    # checking productid
    args.productid = check_productid(args.productid, "- Enter a Product ID (also serves as the output file name): ")

def parse_cli(cliargs=None):
    """Parse CLI with :class:`argparse.ArgumentParser` and return parsed result.

    :param list cliargs: Arguments to parse or None (=use sys.argv)
    :return: parsed CLI result
    :rtype: :class:`argparse.Namespace`
    """
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument("-l", "--languages",
                        dest="langs",
                        help=("One or more translation languages in the format "
                              "'language-country'. "
                              "Uppercase and lowercase are irrelevant."
                              ),
                        )
    parser.add_argument("-d", "--default", "--default-language",
                        dest="default_lang",
                        help=("Default language in the format "
                              "'language-country'. "
                              "Uppercase and lowercase are irrelevant."
                             ),
                        )
    parser.add_argument("-f", "--filter",
                        help="File name prefix of the DC files that you want to configure",
                        )
    parser.add_argument("-m", "--contact",
                        help="E-mail address of the primary documentation maintainer",
                        )
    parser.add_argument("-p", "--product-id",
                        dest="productid",
                        help=("Product ID that will also be used as name for "
                              "this configuration file (Example: 'sles')"),
                        )
    parser.add_argument("path",
                        nargs='?',
                        metavar="PATH",
                        help="Path to the local repository clone",
                        )
    args = parser.parse_args(args=cliargs)

    check_args(args)
    return args

if __name__ == "__main__":
    args = parse_cli()

    branches = get_deliverables(args.path, filter_dc=args.filter)
    git_url = get_origin_url(args.path)
    generate_config(args, git_url, branches)
    sys.exit(0)
