#!/bin/sh
# Copyright 2015 Red Hat, Inc.
#
# Description:  Manages compute daemons
#
# Authors: Andrew Beekhof
#
# Support:      openstack@lists.openstack.org
# License:      Apache Software License (ASL) 2.0
#


#######################################################################
# Initialization:

###
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
###

: ${__OCF_ACTION=$1}

#######################################################################

meta_data() {
    cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="NovaCompute" version="1.0">
<version>1.0</version>

<longdesc lang="en">
OpenStack Nova Compute Server.
</longdesc>
<shortdesc lang="en">OpenStack Nova Compute Server</shortdesc>

<parameters>

<parameter name="auth_url" unique="0" required="1">
<longdesc lang="en">
Authorization URL for connecting to keystone in admin context
</longdesc>
<shortdesc lang="en">Authorization URL</shortdesc>
<content type="string" default="" />
</parameter>

<parameter name="username" unique="0" required="1">
<longdesc lang="en">
Username for connecting to keystone in admin context
</longdesc>
<shortdesc lang="en">Username</shortdesc>
<content type="string" default="" />
</parameter>

<parameter name="password" unique="0" required="1">
<longdesc lang="en">
Password for connecting to keystone in admin context
</longdesc>
<shortdesc lang="en">Password</shortdesc>
<content type="string" default="" />
</parameter>

<parameter name="tenant_name" unique="0" required="1">
<longdesc lang="en">
Tenant name for connecting to keystone in admin context.
Note that with Keystone V3 tenant names are only unique within a domain.
</longdesc>
<shortdesc lang="en">Tenant name</shortdesc>
<content type="string" default="" />
</parameter>

<parameter name="domain" unique="0" required="0">
<longdesc lang="en">
DNS domain in which hosts live, useful when the cluster uses short names and nova uses FQDN
</longdesc>
<shortdesc lang="en">DNS domain</shortdesc>
<content type="string" default="" />
</parameter>

<parameter name="endpoint_type" unique="0" required="0">
<longdesc lang="en">
Nova API location (internal, public or admin URL)
</longdesc>
<shortdesc lang="en">Nova API location (internal, public or admin URL)</shortdesc>
<content type="string" default="" />
</parameter>

<parameter name="region_name" unique="0" required="0">
<longdesc lang="en">
Region name for connecting to nova.
</longdesc>
<shortdesc lang="en">Region name</shortdesc>
<content type="string" default="" />
</parameter>

<parameter name="insecure" unique="0" required="0">
<longdesc lang="en">
Explicitly allow client to perform "insecure" TLS (https) requests.
The server's certificate will not be verified against any certificate authorities.
This option should be used with caution.
</longdesc>
<shortdesc lang="en">Allow insecure TLS requests</shortdesc>
<content type="boolean" default="0" />
</parameter>

<parameter name="no_shared_storage" unique="0" required="0">
<longdesc lang="en">
Indicate that nova storage for instances is not shared across compute
nodes. This must match the reality of how nova storage is configured!
Otherwise VMs could end up in error state upon evacuation. When
storage is non-shared, instances on dead hypervisors will be rebuilt
from their original image or volume, so anything on ephemeral storage
will be lost.
</longdesc>
<shortdesc lang="en">Disable shared storage recovery for instances</shortdesc>
<content type="boolean" default="0" />
</parameter>

<parameter name="evacuation_delay" unique="0" required="0">
<longdesc lang="en">
How long to wait for nova to finish evacuating instances elsewhere
before starting nova-compute.  Only used when the agent detects
evacuations might be in progress.

You may need to increase the start timeout when increasing this value.
</longdesc>
<shortdesc lang="en">Delay to allow evacuations time to complete</shortdesc>
<content type="integer" default="120" />
</parameter>

</parameters>

<actions>
<action name="start"        timeout="600" />
<action name="stop"         timeout="300" />
<action name="monitor"      timeout="20" interval="10" depth="0"/>
<action name="validate-all" timeout="20" />
<action name="meta-data"    timeout="5" />
</actions>
</resource-agent>
END
}

#######################################################################

# don't exit on TERM, to test that lrmd makes sure that we do exit
trap sigterm_handler TERM
sigterm_handler() {
    ocf_log info "They use TERM to bring us down. No such luck."
    return
}

nova_usage() {
    cat <<END
usage: $0 {start|stop|monitor|validate-all|meta-data}

Expects to have a fully populated OCF RA-compliant environment set.
END
}

nova_pid() {
    ps axf | grep python.*nova-compute | grep -v grep | awk '{print $1}'
}

nova_start() {
    nova_monitor
    if [ $? = $OCF_SUCCESS ]; then
        return $OCF_SUCCESS
    fi

    state=$(attrd_updater -p -n evacuate -N ${NOVA_HOST} | \
        sed -e 's/.*value=//' | tr -d '"' )
    if [ "x$state" = x ]; then
        : no fencing to recover

    elif [ "x$state" = xno ]; then
        : has been evacuated, however it could have been 1s ago
        ocf_log info "Pausing to give evacuations from ${NOVA_HOST} time to" \
            "complete"
        sleep ${OCF_RESKEY_evacuation_delay}

        fence_compute ${fence_options} -o on -n ${NOVA_HOST}
        attrd_updater -p -n evacuate -N ${NOVA_HOST} -D
    else
        ocf_log info "Waiting for pending evacuations from ${NOVA_HOST}"
        while [ "x$state" != "xno" -a "x$state" != x ]; do
            state=$(attrd_updater -p -n evacuate -N ${NOVA_HOST} | \
                sed -e 's/.*value=//' | tr -d '"' )
            sleep 5
        done

        ocf_log info "Pausing to give evacuations from ${NOVA_HOST} time to" \
            "complete"
        sleep ${OCF_RESKEY_evacuation_delay}

        fence_compute ${fence_options} -o on -n ${NOVA_HOST}
        attrd_updater -p -n evacuate -N ${NOVA_HOST} -D
    fi

    # do not keep pacemaker 0026 umask; this breaks things as nova-compute
    # needs to be able to read files from qemu/kvm that it will create
    umask 0022
    # $RUNDIR can be tmpfs, thus we have to create/own it here
    mkdir -m 0700 -p /var/run/nova && chown nova. /var/run/nova

    export LIBGUESTFS_ATTACH_METHOD=appliance
    su nova -s /bin/sh -c "/sbin/startproc -q -s /usr/bin/nova-compute"

    rc=$OCF_NOT_RUNNING
    ocf_log info "Waiting for nova to start"
    while [ $rc != $OCF_SUCCESS ]; do
        nova_monitor
        rc=$?
    done

    return $OCF_SUCCESS
}

nova_stop() {
#    pid=$(nova_pid)
#    if [ "x$pid" != x ]; then
#        su nova -c "kill -TERM $pid" -s /bin/bash
#    fi
    /sbin/killproc /usr/bin/nova-compute

#    while [ "x$pid" != x ]; do
    while /sbin/checkproc /usr/bin/nova-compute; do
        sleep 1
#        pid=$(nova_pid)
    done

    return $OCF_SUCCESS
}

nova_monitor() {
#    pid=$(nova_pid)
#    if [ "x$pid" != x ]; then
    if /sbin/checkproc /usr/bin/nova-compute; then
        ## TEMPORARY disable call to fence_compute to avoid noise on first
        ## first startup due to nova-compute not being fast enough to populate
        ## the db and fence_compute checking if node exists and it's enabled
        #state=$(fence_compute ${fence_options} -o status -n $NOVA_HOST | grep Status)
        #if [ "x$state" = "xStatus: ON" ]; then
            return $OCF_SUCCESS
        #else
        #    ocf_exit_reason "Nova status: $state"
        #    return $OCF_ERR_GENERIC
        #fi
    fi

    return $OCF_NOT_RUNNING
}

nova_notify() {
    return $OCF_SUCCESS
}

nova_validate() {
    rc=$OCF_SUCCESS
    fence_options=""

    check_binary crudini
    check_binary nova-compute

    if [ ! -f /etc/nova/nova.conf ]; then
        ocf_exit_reason "/etc/nova/nova.conf not found"
        exit $OCF_ERR_CONFIGURED
    fi

    if [ -z "${OCF_RESKEY_auth_url}" ]; then
        ocf_exit_reason "auth_url not configured"
        exit $OCF_ERR_CONFIGURED
    fi

    fence_options="${fence_options} -k ${OCF_RESKEY_auth_url}"

    if [ -z "${OCF_RESKEY_username}" ]; then
        ocf_exit_reason "username not configured"
        exit $OCF_ERR_CONFIGURED
    fi

    fence_options="${fence_options} -l ${OCF_RESKEY_username}"

    if [ -z "${OCF_RESKEY_password}" ]; then
        ocf_exit_reason "password not configured"
        exit $OCF_ERR_CONFIGURED
    fi

    fence_options="${fence_options} -p ${OCF_RESKEY_password}"

    if [ -z "${OCF_RESKEY_tenant_name}" ]; then
        ocf_exit_reason "tenant_name not configured"
        exit $OCF_ERR_CONFIGURED
    fi

    fence_options="${fence_options} -t ${OCF_RESKEY_tenant_name}"

    if [ -n "${OCF_RESKEY_domain}" ]; then
        fence_options="${fence_options} -d ${OCF_RESKEY_domain}"
    fi

    if [ -n "${OCF_RESKEY_region_name}" ]; then
        fence_options="${fence_options} \
            --region-name ${OCF_RESKEY_region_name}"
    fi

    if [ -n "${OCF_RESKEY_insecure}" ]; then
        if ocf_is_true "${OCF_RESKEY_insecure}"; then
            fence_options="${fence_options} --insecure"
        fi
    fi

    if [ -n "${OCF_RESKEY_no_shared_storage}" ]; then
        if ocf_is_true "${OCF_RESKEY_no_shared_storage}"; then
            fence_options="${fence_options} --no-shared-storage"
        fi
    fi

    if [ -n "${OCF_RESKEY_endpoint_type}" ]; then
        case ${OCF_RESKEY_endpoint_type} in
            adminURL|publicURL|internalURL)
                ;;
            *)
                ocf_exit_reason "endpoint_type ${OCF_RESKEY_endpoint_type}" \
                    "not valid. Use adminURL or publicURL or internalURL"
                exit $OCF_ERR_CONFIGURED
                ;;
        esac
        fence_options="${fence_options} -e ${OCF_RESKEY_endpoint_type}"
    fi

    # we take a chance here and hope that host is either not configured
    # or configured in nova.conf

    local validate_host=1
    local conf_file
    NOVA_HOST=

    for conf_file in /etc/nova/nova.conf /etc/nova/nova.conf.d/*; do
        local parsed_value

        if [ ! -f "$conf_file" ]; then
            continue
        fi

        parsed_value=$(crudini --get "$conf_file" DEFAULT host 2>/dev/null)
        if [ $? = 0 ]; then
            NOVA_HOST="$parsed_value"
        fi
    done

    if [ -z "$NOVA_HOST" ]; then
        if [ "x${OCF_RESKEY_domain}" != x ]; then
            NOVA_HOST=$(uname -n | awk -F. '{print $1}')
        else
            NOVA_HOST=$(uname -n)
        fi
        validate_host=0
    fi

    # We only need to check a configured value, calculated ones are fine
    if [ $validate_host -eq 1 ]; then
        if [ "x${OCF_RESKEY_domain}" != x ]; then
            short_host=$(uname -n | awk -F. '{print $1}')
            if [ "x$NOVA_HOST" != "x${short_host}" ]; then
                ocf_exit_reason "Invalid Nova host name, must be" \
                    "${short_host} in order for instance recovery to function"
                rc=$OCF_ERR_CONFIGURED
            fi

        elif [ "x$NOVA_HOST" != "x$(uname -n)" ]; then
            ocf_exit_reason "Invalid Nova host name, must be $(uname -n) in" \
                "order for instance recovery to function"
            rc=$OCF_ERR_CONFIGURED
        fi
    fi

    if [ $rc != $OCF_SUCCESS ]; then
        exit $rc
    fi
    return $rc
}

: ${OCF_RESKEY_evacuation_delay=120}
case $__OCF_ACTION in
    meta-data)
        meta_data
        exit $OCF_SUCCESS
        ;;
    usage|help)
        nova_usage
        exit $OCF_SUCCESS
        ;;
esac

case $__OCF_ACTION in
    start)
        nova_validate
        nova_start
        ;;
    stop)
        nova_stop
        ;;
    monitor)
        nova_validate
        nova_monitor
        ;;
    notify)
        nova_notify
        ;;
    validate-all)
        exit $OCF_SUCCESS
        ;;
    *)
        nova_usage
        exit $OCF_ERR_UNIMPLEMENTED
        ;;
esac
rc=$?
ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc"
exit $rc
