#!/usr/bin/python3
"""
Synopsis: sort sizes as being printed from du -h

Usage: {appname} [-hVvr][-l limit]
       -h, --help           this message
       -V, --version        print version and exit
       -v, --verbose        verbose mode (cumulative)
       -r, --reverse        reverse result
       -l, --limit #        limit results

Copyright:
(c)2019 by {author}

License:
{license}
"""
#
# vim:set et ts=8 sw=4:
#

__version__ = '0.1'
__author__ = 'Hans-Peter Jansen <hpj@urpla.net>'
__license__ = 'GNU GPL v2 - see http://www.gnu.org/licenses/gpl2.txt for details'


import io
import os
import re
import sys
import getopt
import signal
import textwrap
import functools


class gpar:
    """Global parameter class"""
    appdir, appname = os.path.split(sys.argv[0])
    if appdir == '.':
        appdir = os.getcwd()
    if appname.endswith('.py'):
        appname = appname[:-3]
    pid = os.getpid()
    version = __version__
    author = __author__
    license = __license__
    loglevel = 0
    limit = 0
    reverse = False
    encoding = 'utf8'
    si_prefixes = 'KMGTPEZY'   # this isn't SI, it's a JEDEC derivate
    # see https://en.wikipedia.org/wiki/Byte#Unit_symbol
    si_pattern = re.compile(r'''
        ^                   # anchor at start
        \s*                 # opt: spaces
        (?P<integer>\d+)    # man: integer
        [.,]?               # opt: point
        (?P<fraction>\d+)?  # opt: fraction
        \s*                 # opt: spaces
        (?P<si_unit>[%s])?  # opt: si unit
        \s*                 # opt: spaces
        $                   # anchor at end
    ''' % si_prefixes, re.VERBOSE)


stdout = lambda *msg: print(*msg, file = sys.stdout, flush = True)
stderr = lambda *msg: print(*msg, file = sys.stderr, flush = True)


def vout(lvl, *msg):
    """Verbose output"""
    if lvl <= gpar.loglevel:
        stderr(*msg)


class trace:
    """Trace decorator class"""
    def __init__(self, level = 3, maxlen = 20):
        self.level = level
        self.maxlen = maxlen

    def abbrev(self, arg):
        if arg:
            argstr = repr(arg)
            if len(argstr) > self.maxlen:
                argstr = argstr[:self.maxlen] + "..'"
            return argstr
        return arg

    def argstr(self, *args, **kwargs):
        arglist = []
        for arg in args:
            if arg:
                arglist.append(self.abbrev(arg))
        for k, v in kwargs.items():
            arglist.append('{} = {}'.format(k, self.abbrev(v)))
        return ', '.join(arglist)


    def __call__(self, func):
        @functools.wraps(func)
        def trace_and_call(*args, **kwargs):
            result = func(*args, **kwargs)
            argstr = self.argstr(*args, **kwargs)
            vout(self.level, '{}({}): {}'.format(func.__name__, argstr, result))
            return result
        return trace_and_call


def exit(ret = 0, msg = None, usage = False):
    """Terminate process with optional message and usage """
    if msg:
        stderr('{}: {}'.format(gpar.appname, msg))
    if usage:
        stderr(__doc__.format(**gpar.__dict__))
    sys.exit(ret)


def rstrip(line, lst = ' \t\r\n'):
    """Strip whitespace and line breaks from line end"""
    items = list(lst)
    while line and line[-1] in items:
        line = line[:-1]
    return line


@trace(2)
def parsesize(value):
    m = gpar.si_pattern.match(value)
    if m:
        vout(2, m.groupdict())
        value = int(m['integer'])
        fraction = m['fraction']
        si_unit = m['si_unit']
        scale = 1.0
        if fraction is not None:
            value = float('%s.%s' % (value, fraction))
        if si_unit is not None:
            if si_unit in gpar.si_prefixes:
                scale = 1024.0 ** (gpar.si_prefixes.index(si_unit) + 1)
            else:
                vout(0, 'unknown unit: %s' % si_unit)
        return value * scale
    else:
        vout(0, 'invalid size: %s' % value)
        return 0


@trace()
def sortsizes(fd, desc):
    """Sort sizes"""
    vout(1, 'sortsizes {}'.format(desc))
    indata = fd.readlines()
    outdata = []
    for idx, line in enumerate(indata):
        line = rstrip(line)
        if not line:
            continue
        size, sep, app = re.split(r'(\s+)', line, maxsplit=1)
        outdata.append((parsesize(size), size, sep, app))

    # invert reverse logic, we prefer the largest first by default
    outdata.sort(reverse = not gpar.reverse)

    # limit result set
    limit = gpar.limit or len(outdata)

    # combine lines
    for _, size, sep, app in outdata[:limit]:
        stdout(''.join((size, sep, app)))


def process(args):
    """Process files or stdin"""
    ret = 0
    vout(1, 'started with pid {pid} in {appdir}'.format(**gpar.__dict__))
    if not args:
        stdin = getattr(sys.stdin, 'buffer', sys.stdin)
        fd = io.open(stdin.fileno(), encoding=gpar.encoding)
        sortsizes(fd, 'stdin')
    else:
        for arg in args:
            fd = open(arg, encoding=gpar.encoding)
            sortsizes(fd, arg)
    return ret


def main(argv = None):
    """Command line interface and console script entry point."""
    if argv is None:
        argv = sys.argv[1:]

    try:
        optlist, args = getopt.getopt(argv, 'hVvrl:',
            ('help', 'version', 'verbose', 'reverse', 'limit')
        )
    except getopt.error as msg:
        exit(1, msg, True)

    for opt, par in optlist:
        if opt in ('-h', '--help'):
            exit(usage = True)
        elif opt in ('-V', '--version'):
            exit(msg = 'version {}'.format(gpar.version))
        elif opt in ('-v', '--verbose'):
            gpar.loglevel += 1
        elif opt in ('-r', '--reverse'):
            gpar.reverse = True
        elif opt in ('-l', '--limit'):
            try:
                gpar.limit = int(par)
            except ValueError:
                exit(2, 'invalid parameter for limit: \'%s\'' % par)

    # ignore broken pipe errors (SIGPIPE)
    signal.signal(signal.SIGPIPE, signal.SIG_DFL)

    try:
        return process(args)
    except KeyboardInterrupt:
        return 3    # SIGQUIT


if __name__ == '__main__':
    sys.exit(main())

