#!/usr/bin/python2
# -*- coding: UTF-8 -*-

import sys
import os
import stat
from optparse import Option, OptionParser
import tempfile

try:
    from spacewalk.common import checksum
    from spacewalk.common.rhnLog import initLOG, log_debug
    from spacewalk.common.rhnConfig import CFG, initCFG
    from spacewalk.server import rhnSQL
    from spacewalk.server.rhnPackage import unlink_package_file
except:
    _LIBPATH = "/usr/share/rhn"
    # add to the path if need be
    if _LIBPATH not in sys.path:
        sys.path.append(_LIBPATH)
    from common import CFG, initCFG, initLOG, log_debug
    from server import rhnSQL
    from server.rhnPackage import unlink_package_file

LOG_FILE = '/var/log/rhn/spacewalk-data-fsck.log'

report_msg = {
    'file': "%5d files scanned",
    'exists': "ERROR: %5d files missing on disk",
    'dbexists': "ERROR: %5d files missing in database",
    'srpmexists': "ERROR: %5d SRPMs without matching RPM in db",
    'restore': "ERROR: %5d file paths could not be restored",
    'size': "ERROR: %5d file size mismatch(es)",
    'checksum': "ERROR: %5d file checksum mismatch(es)",
    'nevrao': "ERROR: %5d file NEVRAO mismatch(es)",
}
report = {}


def is_sha256_capable():
    import rpm
    ts = rpm.TransactionSet()
    mi = ts.dbMatch('name', 'spacewalk-schema')
    cmp = -1
    try:
        h = mi.next()
        cmp = rpm.labelCompare([h['name'], h['version'], h['release']],
                               [h['name'], '0.8.1', '1'])
    except StopIteration:
        pass
    return (cmp > 0)


def db_init():
    initCFG('server')
    rhnSQL.initDB()


def src_package_query():
    query = """select p.id
                 from rhnPackage p
                where p.path like :filename"""
    return query

def package_query(options, bind_path=False):
    query = """select %s
                 from %s
                     """
    columns = "p.id, p.org_id, p.package_size, p.path"
    tables = "rhnPackage p"
    if options.checksum or options.nevrao:
        if is_sha256_capable():
            columns += ", c.checksum, c.checksum_type"
            tables += " join rhnChecksumView c on p.checksum_id = c.id"
        else:
            columns += ", p.md5sum as checksum, 'md5sum' as checksum_type"
    if options.nevrao:
        columns += ", n.name, evr.epoch, evr.version, evr.release, a.label as arch"
        tables += " join rhnPackageName n on p.name_id = n.id"
        tables += " join rhnPackageEVR evr on p.evr_id = evr.id"
        tables += " join rhnPackageArch a on p.package_arch_id = a.id"
    if bind_path:
        query += "where p.path = :path"
    return query % (columns, tables)


def check_db_vs_disk(options):
    for k in report_msg.keys():
        report[k] = 0
    query = package_query(options)
    h = rhnSQL.prepare(query)

    path_prefix = CFG.MOUNT_POINT

    h.execute()

    while 1:
        row = h.fetchone_dict()
        if not row:
            break

        if row['path']:
            abs_path = os.path.join(path_prefix, row['path'])
            log(3, abs_path)
            report['file'] += 1

            if check_disk_exists(abs_path) == 1:
                report['exists'] += 1
                not_on_disk(row, abs_path, options)
            elif options.restore:
                report['restore'] += check_disk_nevrao(abs_path, row.copy(), True)
            else:
                if options.size:
                    report['size'] += check_disk_size(abs_path, row['package_size'])
                if options.nevrao:
                    report['nevrao'] += check_disk_nevrao(abs_path, row.copy())
                if options.checksum:
                    report['checksum'] += check_disk_checksum(abs_path,
                                                              row['checksum_type'], row['checksum'])
    h.close()
    error_found = 0
    for i in ['file', 'exists', 'restore', 'size', 'nevrao', 'checksum']:
        if report[i] > 0:
            log(1, report_msg[i] % report[i])
            if i != 'file':
                error_found = 1
    return error_found


def check_disk_exists(abs_path):
    return not os.path.isfile(abs_path)


def not_on_disk(row, abs_path, options):
    if options.remove:
        log(0, "Removed package path from db: %s" % row['path'])
        rhnSQL.execute('update rhnPackage set path = NULL where id = :id', id=row['id'])
        rhnSQL.commit()
    else:
        log(0, "File missing: %s" % abs_path)


def check_disk_size(abs_path, size):
    file_size = os.stat(abs_path)[stat.ST_SIZE]
    ret = 0
    if file_size != size:
        log(0, "File size mismatch: %s (%s vs. %s)" % (abs_path, size, file_size))
        ret = 1
    return ret


def check_disk_nevrao(abs_path, row, restore=None):
    file = {}
    try:
        (_redhat_,
        file['org_id'],
        file['checksum_prefix'],
        file['name'],
        file['evr'],
        file['arch'],
        file['checksum'],
        file['basename']) = row['path'].split('/')
    except ValueError:
        log(0, "File path %s mismatch structure" % row['path'])
        return 1

    if not row['org_id']:
        row['org_id'] = 'NULL'
    else:
        row['org_id'] = str(row['org_id'])
    row['checksum_prefix'] = row['checksum'][:3]
    if row['epoch']:
        row['evr'] = row['epoch'] + ':'
    else:
        row['evr'] = ''
    row['evr'] += row['version'] + '-' + row['release']
    row['basename'] = "%s-%s-%s.%s.%s" % (
        row['name'], row['version'], row['release'],
        row['arch'], row['path'][-3:])
    ret = 0
    for key in ('org_id', 'checksum_prefix', 'name', 'evr', 'arch', 'checksum',
                'basename'):
        if file[key] != row[key]:
            log(0, "File path mismatch: %s (%s: %s vs. %s)" % (abs_path, key, row[key],
                                                               file[key]))
            if restore:
                log(2, "Beginning file path restoration: %s" % abs_path)
                return restore_file_path(file, row)
            ret = 1
    return ret


def restore_file_path(file, row):
    prefix = CFG.MOUNT_POINT + '/'
    new_path = 'redhat/' + row['org_id'] + '/' + row['checksum_prefix'] + '/' + row['name'] + \
        '/' + row['evr'] + '/' + row['arch'] + '/' + row['checksum'] + '/' + row['basename']
    query = "update rhnPackage set path=\'" + new_path + "\' where id=" + str(row['id'])
    new_path = prefix + new_path

    if (os.path.isfile(new_path) and
       check_disk_checksum(new_path, row['checksum_type'], row['checksum'])):
            backup_path = tempfile.mkdtemp() + new_path
            log(0, "Target file path exists, moving to: %s" % backup_path)
            os.renames(new_path, backup_path)

    cleanup = True
    try:  # 1st, make dirs needed
        os.makedirs(new_path.rsplit('/', 1)[0])
    except OSError, e:
        if e.errno != 17:  # something broke up
            log(0, "File path restoration failed: %s" % row['path'])
            return 1
        else:
            cleanup = False  # dir already exists, do not cleanup
    try:  # 2nd, create hardlink; os.link and os.makedirs in 1 dir would be problem due to checking of errno 17...
        if not os.path.isfile(new_path):
            os.link(prefix + row['path'], new_path)
    except OSError:
        log(0, "File path restoration failed, rolling back: %s" % row['path'])
        if cleanup == True:
            try:
                os.removedirs(new_path.rsplit('/', 1)[0])
            except OSError, e:
                if e.errno != 39:
                    log(0, "Could not rollback file path restoration: %s" % row['path'])
                    return 1
        return
    h = rhnSQL.prepare(query)
    h.execute()
    rhnSQL.commit()
    try:
        os.remove(prefix + row['path'])
        os.removedirs(prefix + row['path'].rsplit('/', 1)[0])
    except OSError, e:
        if e.errno == 2:
            log(2, "File %s%s was removed by someone else but transaction has been successfully completed" %
                (prefix, row['path']))
        elif e.errno != 39:
            log(4, "Unexpected error, computer is going to self destruct")
    log(0, "File %s%s successfully restored, new file path: %s" % (prefix, row['path'], new_path))
    return 0


def check_disk_checksum(abs_path, checksum_type, db_checksum):
    try:
        fp = file(abs_path, 'rb')
        h = checksum.getHashlibInstance(checksum_type, False)
        h.update(fp.read())
        file_checksum = h.hexdigest()
        fp.close()
    except:
        file_checksum = None
    ret = 0
    if file_checksum != db_checksum:
        log(0, "File checksum mismatch: %s (%s: %s vs. %s)" %
            (abs_path, checksum_type, db_checksum, file_checksum))
        if options.remove_mismatch:
            remove_mismatch(abs_path, options)
        ret = 1
    return ret


def check_disk_vs_db(disk_content=None, db_content=None):
    for k in report_msg.keys():
        report[k] = 0
    query = package_query(options, bind_path=True)
    h = rhnSQL.prepare(query)
    for root, dirs, files in os.walk(os.path.join(CFG.MOUNT_POINT, 'redhat')):
        rel_root = root[len(CFG.MOUNT_POINT) + 1:]
        for f in files:
            abs_path = os.path.join(root, f)
            rel_path = os.path.join(rel_root, f)
            log(3, abs_path)
            report['file'] += 1

            h.execute(path=rel_path)
            row = h.fetchone_dict()

            if not row:
                if (not is_srpm(f)) or is_orphaned_srpm(abs_path, f):
                    if is_srpm(f):
                        report['srpmexists'] += 1
                    else:
                        report['dbexists'] += 1
                    not_in_db(abs_path, options)
                continue

            if options.size and options.fs_only:
                report['size'] += check_disk_size(abs_path, row['package_size'])
            if options.nevrao and options.fs_only:
                report['nevrao'] += check_disk_nevrao(abs_path, row.copy())
            if options.checksum and options.fs_only:
                report['checksum'] += check_disk_checksum(abs_path,
                                                          row['checksum_type'], row['checksum'])
    h.close()
    error_found = 0
    for i in ['file', 'dbexists', 'srpmexists', 'size', 'nevrao', 'checksum']:
        if report[i] > 0:
            log(1, report_msg[i] % report[i])
            if i != 'file':
                error_found = 1
    return error_found

def is_srpm(file):
    if file.find("src.rpm") != -1:
        return True
    else:
        return False

def is_orphaned_srpm(path, file):
    if is_srpm(file):
        query = src_package_query()
        h = rhnSQL.prepare(query)
        wildcard_filename = "%/" + file.replace('src','%')
        h.execute(filename=wildcard_filename)
        row = h.fetchone_dict()
        if not row:
            log(0, "SRPM without matching RPM in db: %s" % (path))
            return True

    return False

def not_in_db(path, options):
    if options.remove:
        if is_srpm(path):
            log(0, "Removed SRPM without matching RPM in db: %s" % (path))
        else:
            log(0, "Removed file missing in db: %s" % (path))
        unlink_package_file(path)
    else:
        if not is_srpm(path):
            log(0, "File missing in db: %s" % (path))

def remove_mismatch(path, options):
    log(0, "Removed file because checksum does not match: %s" % (path))
    unlink_package_file(path)

def log(level, *args):
    log_debug(level, *args)
    verbose = options.verbose
    if not verbose:
        verbose = 0
    if verbose >= level:
        print (', '.join(map(lambda i: str(i), args)))

if __name__ == '__main__':

    options_table = [
        Option("-v", "--verbose",       action="count",
               help="Increase verbosity"),
        Option("-S", "--no-size",       action="store_false", dest="size", default=True,
               help="Don't check package size"),
        Option("-C", "--no-checksum",   action="store_false", dest="checksum", default=True,
               help="Don't check package checksum"),
        Option("-O", "--no-nevrao",     action="store_false", dest="nevrao", default=True,
               help="Don't check package name, epoch, version, release, arch, org"),
        Option("-d", "--db-only",       action="store_true",
               help="Check only if packages from database are present on filesystem"),
        Option("-f", "--fs-only",       action="store_true",
               help="Check only if packages from filesystem are in the database"),
        Option("-r", "--remove",        action="store_true",
               help="Automatically remove packages from filesystem not present in database"),
        Option("-R", "--remove-mismatch", action="store_true", dest="remove_mismatch", default=False,
               help="Automatically remove packages from filesystem that does not match the checksum stored in database. (not valid with --db-only)"),
        Option("-F", "--fix-file-path", action="store_true", dest="restore", default=False,
               help="Restores file paths, try this when you have NEVRAO mismatches. Do not run this command with another commands"),
    ]
    parser = OptionParser(option_list=options_table)
    (options, args) = parser.parse_args()

    initLOG(LOG_FILE, options.verbose or 0)

    db_init()

    exit_value = 0
    if options.remove_mismatch:
        if options.db_only or not options.checksum:
            log(0, "Syntax error: The option --remove-mismatch cannot be used with --db-only or --no-checksum")
            sys.exit(1)
    if not options.fs_only:
        log(1, "Checking if packages from database are present on filesystem")
        exit_value += check_db_vs_disk(options)
    if not options.db_only:
        log(1, "Checking if packages from filesystem are present in database")
        exit_value += check_disk_vs_db(options)

    sys.exit(exit_value)
