#!/usr/bin/python3

#   Copyright (c) MediaTek USA Inc., 2020-2024
#
#   This program is free software;  you can redistribute it and/or modify
#   it under the terms of the GNU General Public License as published by
#   the Free Software Foundation; either version 2 of the License, or (at
#   your option) any later version.
#
#   This program is distributed in the hope that it will be useful, but
#   WITHOUT ANY WARRANTY;  without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
#   General Public License for more details.
#
#   You should have received a copy of the GNU General Public License
#   along with this program;  if not, see
#   <http://www.gnu.org/licenses/>.
#
#
# This script traverses Python coverage data in one or more coverage
# data file (generated by the Coverage.py module) and translates it into
# LCOV .info format.
#
#   py2lcov [--output mydata.info] [--test-name name] [options] coverage.dat+
#
# See 'py2lcov --help' for more usage information
#
# See https://coverage.readthedocs.io for directions on how to use Coverage.py
# to generate Python coverage data.
#
#
# arguably, should do this in Perl so we can use the lcovutil module
#
# @todo figure out/enhance Coverage.py to characterize branch expressions
# @todo perhaps this should be integrated into the Coverage.py module itself
#

import os
import os.path
import sys
import re
import argparse
import xml.etree.ElementTree as ET
import fnmatch
import subprocess
import copy
import base64
import hashlib

def line_hash(line: str) -> str:
    """Produce a hash of a source line for use in the LCOV file."""
    hashed = hashlib.md5(line.encode("utf-8")).digest()
    return base64.b64encode(hashed).decode("ascii").rstrip("=")


class ProcessFile:
    def __init__(self, scriptArgs):
        self._args = scriptArgs

        if scriptArgs.input:
            if not scriptArgs.keepGoing:
                print("--input is deprecated - please use 'py2lcov ... %s" % (scriptArgs.input))
            scriptArgs.inputs.append(scriptArgs.input);

        self._excludePatterns = scriptArgs.excludePatterns.split(',') if scriptArgs.excludePatterns else None
        self._versionScript = scriptArgs.version.split(',') if scriptArgs.version else None

        if not scriptArgs.inputs:
            # no input file - see if COVERAGE_FILE environment variable is set
            try:
                scriptArgs.inputs.append(os.environ['COVERAGE_FILE'])
                print("reading input from default COVERAGE_FILE '%s'" % scriptArgs.inputs[0])
            except:
                print("Error:  no input files")
                sys.exit(1)

        self._outf = open(scriptArgs.output, 'w')
        self._outf.write("TN:%s\n" % scriptArgs.testName)

        for f in scriptArgs.inputs:
            base, ext = os.path.splitext(f)
            if ext == '.xml':
                self.process_xml_file(f)
                continue

            # assume that anything not ending in .xml is a Coverage.py data file
            xml = base + '.xml'
            suffix = 1
            while os.path.exists(xml):
                xml = base + '.xml%d' % suffix
                suffix += 1
            cmd = 'COVERAGE_FILE=%s coverage xml -o %s' % (f, xml)
            try:
                #x = subprocess.run(cmd, capture_output=True, shell=True, check=True)
                x = subprocess.run(cmd, shell=True, check=True, stdout=True, stderr=True)
            except subprocess.CalledProcessError as err:
                print("Error:  error during XML conversion of %s: %s" % (
                    f, str(err)));
                if not self._args.keepGoing:
                    sys.exit(1)
                continue
            self.process_xml_file(xml)
            os.unlink(xml)

        self._outf.close()

    def process_xml_file(self, xml_file):

        tree = ET.parse(xml_file)
        root = tree.getroot()
        source_paths = []

        try:
            if(root[0].tag == 'sources'):
                for source in root[0]:
                    source_paths.append(source.text)
                    if self._args.verbose:
                        print("source: " + source.text)
            else:
                print("Error: parse xml fail: no 'sources'")
                sys.exit(1)
            if(root[1].tag == 'packages'):
                if (self._args.verbose):
                    print("packages: " + str(root[1].attrib))
            else:
                print("Error: parse xml fail: no 'packages'")
                sys.exit(1)
        except Exception as err:
            print("Error: parse xml fail: %s" % (str(err)))
            if not self._args.keepGoing:
                sys.exit(1)
            return

        if(len(source_paths) > 1):
            print("Error: %s: Only support single source path" %(xml_file))
            if not self._args.keepGoing:
                sys.exit(1)
            return

        elif (len(source_paths) == 0):
            source_path = ''
        else:
            source_path = source_paths[0]

        for package in root[1]:
            # name="." means current directory
            # name=".folder1.folder2" means external module or directory
            # name="abc" means internal module or directory
            isExternal = (package.attrib['name'].startswith('.') and package.attrib['name'] != '.')
            for classes in package:
                for fileNode in classes:
                    if self._args.excludePatterns and any([fnmatch.fnmatchcase(fileNode.attrib['filename'], ef) for ef in self._excludePatterns]):
                        if self._args.verbose:
                            print("%s is excluded" % fileNode.attrib['filename'])
                        continue
                    name = fileNode.attrib['filename'] if isExternal else os.path.join(source_path, fileNode.attrib['filename'])
                    self._outf.write("SF:%s\n" % name)
                    if self._versionScript:
                        cmd = copy.deepcopy(self._versionScript)
                        cmd.append(name)
                        try:
                            version = subprocess.check_output(cmd)
                            self._outf.write("VER:%s\n" % version.strip().decode('UTF-8'))
                        except Exception as err:
                            print("Error: no version for %s: %s" %(
                                name, str(err)))
                            if not self._args.keepGoing:
                                sys.exit(-1)

                    self.process_file(fileNode, name)
                    self._outf.write("end_of_record\n")

    def process_file(self, fileNode, filename):

        lines = None
        if self._args.deriveFunctions:
            try:
                with open(filename, 'r') as f:
                    lines = f.read().split('\n')
            except:
                print("cannot open %s - some features disabled" % (filename));
                if not self._args.keepGoing:
                    sys.exit(1)

        def count(indent):
            count = 0
            for c in indent:
                if c == ' ':
                    count += 1
                else:
                    assert(c == '\t') # shouldn't be anything but space or tab
                    count += self._args.tabWidth
            return count

        def buildFunction(functions, objStack, currentObj, lastLine):
            if currentObj and prevLine:
                currentObj['end'] = lastLine # last line
                prefix = ''
                sep = ''
                for e in objStack:
                    prefix += sep + e['name']
                    sep = "::" if e['type'] == 'class' else '.'
                if currentObj['type'] == 'def':
                    fullname = prefix + sep + currentObj['name']
                    # function might be unreachable dead code
                    try:
                        hit = currentObj['hit']
                    except:
                        hit = 0
                    functions.append({'name'  : fullname,
                                      'start' : currentObj['start'],
                                      'end'   : currentObj['end'],
                                      'hit'   : hit})

        # just collect the function/class name - ignore the params
        parseLine = re.compile('(\s*)((def|class)\s*([^\( \t]+))?')
        #parseLine = re.compile('(\s*)((def|class)\s*([^:]+)(:|$))?')
        for node in fileNode:
            if node.tag != 'lines':
                continue
            # Keep track of current function/class scope - which we use to find
            #   the first and last executable lines in each function,
            # Want to keep track of the function end line - so we can use lcov
            # function exclusions.
            #   currentObj:
            #    type:   'class' or 'def'
            #    name:   as appears in regexp
            #    indent: indent count of 'def' or 'class' statement
            #    start:  line of item (where 'def' or 'class' is found
            #    end:    last line of function
            #    hit:    whether first line of function is hit or not
            currentObj = None # {type name startIndent lineNo first end start}
            objStack = []
            functions = [] # list of [functionName startLine endLine hitcout]
            prevLine = None
            totals = { 'line' : [0, 0, 'LF', 'LH'],
                       'branch' : [0, 0, 'BRF', 'BRH'],
                       'function' : [0, 0, 'FNF', 'FNH'],
            }
            # need to save the statement data and print later because Coverage.py
            # has an odd interpretation of the execution status of the function
            # decl line.
            #   - C/C++ mark it executed if the line is entered - so it
            #     is an analog of function coverage.
            #   - Coverage.py appears to mark it executed when the containing
            #     scope is executed (i.e., when a lazy interpret might compile
            #     the function).
            # However, we want to mark the decl executed only if the function
            # is executed - and we decide that the function is executed if first
            # line in the function is hit.
            #   - as a result, after seeing all the functions, we want to go back
            #     and mark the function decl line as 'not hit' if we decided that
            #     the function itself is not executed.
            lineData = {}
            for line in node:
                lineNo = int(line.attrib['number'])
                hit = int(line.attrib["hits"])
                lineData[lineNo] = hit;

                totals['line'][0] += 1
                if hit:
                    totals['line'][1] += 1
                if lines:
                    if lineNo <= len(lines):
                        m = parseLine.search(lines[lineNo-1])
                        if m:
                            indent = count(m.group(1))
                            #print(lines[lineNo-1])
                            while currentObj and indent <= currentObj['indent']:
                                # lower indent - so this is a new object
                                #print("build " + currentObj['name'])
                                buildFunction(functions, objStack,
                                              currentObj, prevLine)

                                try:
                                    currentObj = objStack.pop()
                                except IndexError as err:
                                    currentObj = None
                                    break

                            if m.group(2):
                                if currentObj:
                                    objStack.append(currentObj)
                                objtype = m.group(3)
                                name = m.group(4).rstrip()
                                if (-1 != name.find('(') and
                                    ')' != name[-1]):
                                    name += '...)'
                                currentObj = { 'type':   objtype,
                                               'name':   name,
                                               'indent': indent,
                                               'start':  lineNo,
                                }
                            else:
                                # just a line - may be the first executable
                                #   line in some function:
                                if currentObj and not 'hit' in currentObj:
                                    currentObj['hit'] = hit
                                    # mark that function decl line is not
                                    #  hit if the function is not hit
                                    if 0 == hit:
                                        assert(currentObj['start'] in lineData)
                                        lineData[currentObj['start']] = 0

                        prevLine = lineNo
                    else:
                        print('"%s":%d: Error: out of range: file contains %d lines' % (
                            filename, lineNo, len(lines)))
                        if not self._args.keepGoing:
                            sys.exit(1)

                if "branch" in line.attrib:
                    assert(line.attrib["branch"] == "true") # always true from xmlreport.py
                    assert('condition-coverage' in line.attrib) # always true from xmlreport.py
                    m = re.match(r'\d+\% \((\d+)/(\d+)\)', line.attrib['condition-coverage'])
                    assert(m)
                    taken = int(m.group(1))
                    total = int(m.group(2))
                    # no information of which clause is taken or not
                    # set taken conditions start from 0 and followed by
                    #  non-taken conditions
                    # taken conditions
                    for cond in range(0,taken):
                        self._outf.write("BRDA:%d,0,%d,1\n" % (lineNo, cond))
                        totals['branch'][0] += 1
                        totals['branch'][1] += 1
                    # non-taken conditions
                    for cond in range(taken, total):
                        totals['branch'][0] += 1
                        self._outf.write("BRDA:%d,0,%d,0\n" % (lineNo, cond))

            # and build all the pending functions
            #  these were still open when we hit the end of file - e.g., because
            #  they are last elements in some package file and there are no
            #  no executable lines after the function decl.
            # There may be more than one function in the stack, if the last
            # object is nested.
            while currentObj:
                buildFunction(functions, objStack, currentObj, prevLine)

                try:
                    currentObj = objStack.pop()
                except IndexError as err:
                    currentObj = None
                    break

            # print the LCOV function data
            for f in functions:
                totals['function'][0] += 1
                if f['hit']:
                    totals['function'][1] += 1
                self._outf.write("FN:%(start)d,%(end)d,%(name)s\nFNDA:%(hit)d,%(name)s\n" % f)
            # print the LCOV line data.
            for lineNo in sorted(lineData.keys()):
                checksum = ''
                if self._args.checksum:
                    try:
                        checksum = ',' + line_hash(lines[lineNo])
                    except IndexError as err:
                        print('"%s":%d: unable to compute checksum for missing line' % (filename, lineNo))
                        if not self._args.keepGoing:
                            raise(err)

                self._outf.write("DA:%d,%d%s\n" % (lineNo, lineData[lineNo], checksum));

            # print the LCOV totals - not used by lcov, but maybe somebody does
            for key in totals:
                d = totals[key]
                if d[0] == 0:
                    continue
                self._outf.write("%s:%d\n" % (d[2], d[0]))
                self._outf.write("%s:%d\n" % (d[3], d[1]))

def main():
    usageString="""py2lcov: Translate Python coverage data to LCOV .info format.
Please also see https://coverage.readthedocs.io

Example:
   $ export PYCOV_DATA=path/to/pydata

     For 'coverage' versions 6.6.1 and higher (which support "--data-file"):
   $ coverage run --data-file=${PYCOV_DATA} --append --branch \\
       `which myPthonScript.py` args_to_my_python_script

     For older versions which don't support "--data-files":
        use COVERAGE_FILE environment variable to specify data file
   $ COVERAGE_FILE=${PYCOV_DATA} coverage run --append --branch \\
       `which myPthonScript.py` args_to_my_python_script

     # now use py2lcov to translate the XML to INFO file format -
     # also include version information in the generated coverage data.
   $ py2lcov -o pydata.info ${PYCOV_DATA}

     # and use genhtml to produce an HTML coverage report:
   $ genhtml -o html_report pydata.info ....

   Deprecated feature:
   For backward compatibility, py2lcov also supports translation to LCOV
   format from intermediate XML:

       # first translate from Python coerage data to XML:
     $ coverage xml --data-file=${PYCOV_DATA} -o pydata.xml |& tee pydata.log
       # or - if your Coverage.py module is too old to support '--data-file':
     $ COVERAGE_FILE=${PYCOV_DATA} coverage xml -o pydata.xml |& tee pydata.log

       # then translate XML to LCOV format:
     $ py2lcov -i pydata.xml -o pydata.info --version-script myCovScript

"""
    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog=usageString)

    parser.add_argument('-i', '--input', dest='input', default=None,
                        help="DEPRECATED: specify the input xml file from coverage.py")
    parser.add_argument('-o', '--output', dest='output', default='lcov.info',
                        help="specify the out LCOV .info file, default(lcov.info)")
    parser.add_argument('-t', '--test-name', '--testname', dest='testName', default='',
                        help="specify the test name for the TN: entry in LCOV .info file")
    parser.add_argument('-e', '--exclude', dest='excludePatterns', default='',
                        help="specify the exclude file patterns separated by ','")
    parser.add_argument('-v', '--verbose', dest='verbose', default=False, action='store_true',
                        help="print debug messages")
    parser.add_argument('--version-script', dest='version',
                        help="version extract callback")
    parser.add_argument('--checksum', dest='checksum', action='store_true',
                        default=False,
                        help="compute line checksum - see 'man lcov'")
    parser.add_argument("--no-functions", dest='deriveFunctions',
                        default=True, action='store_false',
                        help="do not derive function coverpoints")
    parser.add_argument("--tabwidth", dest='tabwidth', default=8, type=int,
                        help='tabsize when computing indent')
    parser.add_argument('-k', "--keep-going", dest='keepGoing', default=False, action='store_true',
                        help="ignore errors")
    parser.add_argument('inputs', nargs='*',
                        help="list of python coverage data input files - expected to be XML or Python .dat format")

    try:
        args = parser.parse_args()
    except IOError as err:
        print(str(err))
        sys.exit(2)

    p = ProcessFile(args)


if __name__ == '__main__':
    main()
