#!/bin/bash
#
# LBNL Node Health Check -- Node Offlining Helper
#
# Michael Jennings <mej@lbl.gov>
# 16 September 2011
#

# This script is a simple pbsnodes wrapper that the node health check
# can run in the background to mark nodes offline.  It will first
# obtain the current node state information to avoid overwriting notes
# which were not placed by NHC.  If these checks pass, the node is
# marked offline with the note supplied.

IGNORE_EMPTY_NOTE="${IGNORE_EMPTY_NOTE:-0}"
LEADER="NHC:"

echo "`date '+%Y%m%d %H:%M:%S'` $0 $*"

HOSTNAME="$1"
shift
NOTE="$*"

### PBS (TORQUE)
if [[ "$NHC_RM" == "pbs" ]]; then
    PBSNODES="${PBSNODES:-pbsnodes}"
    PBSNODES_LIST_ARGS="${PBSNODES_LIST_ARGS:--n -l all}"
    PBSNODES_OFFLINE_ARGS="${PBSNODES_OFFLINE_ARGS:--o -N}"

    LINE=( $($PBSNODES $PBSNODES_LIST_ARGS $HOSTNAME) )
    STATUS="${LINE[1]}"
    OLD_NOTE_LEADER="${LINE[2]}"
    OLD_NOTE="${LINE[*]:3}"
    case "$STATUS" in
        *down*|*offline*|*unknown*)
            if [[ "${STATUS/offline}" != "${STATUS}" ]]; then
                # If the node is already offline, and there is no old note, and
                # we've not been told to ignore that, do not touch the node.
                if [[ -z "$OLD_NOTE_LEADER" && "$IGNORE_EMPTY_NOTE" != "1" ]]; then
                    echo "$0:  Not offlining $HOSTNAME:  Already offline with no note set."
                    exit 0
                fi
            fi
            # If there's an old note that wasn't set by NHC, preserve it.
            if [[ -n "$OLD_NOTE_LEADER" && "$OLD_NOTE_LEADER" != "$LEADER" ]]; then
                LEADER="$OLD_NOTE_LEADER"
                NOTE="$OLD_NOTE"
            fi
            ;;
    esac

    echo "$0:  Marking $STATUS $HOSTNAME offline:  $LEADER $NOTE"
    exec $PBSNODES $PBSNODES_OFFLINE_ARGS "$LEADER $NOTE" $HOSTNAME

### Slurm
elif [[ "$NHC_RM" == "slurm" ]]; then
    SLURM_SINFO="${SLURM_SINFO:-sinfo}"
    SLURM_SCONTROL="${SLURM_SCONTROL:-scontrol}"
    SLURM_SC_OFFLINE_ARGS="${SLURM_SC_OFFLINE_ARGS:-update State=DRAIN}"

    LINE=( $($SLURM_SINFO -o '%t %E' -hn ${HOSTNAME_S}) )
    STATUS="${LINE[0]}"
    OLD_NOTE_LEADER="${LINE[1]}"
    OLD_NOTE="${LINE[*]:2}"
    case "$STATUS" in
        *'@'*|*'#'*|*-*)
            # These states aren't handled yet.
            echo "$0:  State \"$STATUS\" not yet handled; ignoring."
            exit 0
            ;;
        # Node states: src/common/slurm_protocol_defs.c --> node_state_string()
        alloc*|boot*|comp*|drain*|drng*|fail*|idle*|maint*|mix*|plnd*|resume*|resv*|undrain*)
            case "$STATUS" in
                drain*|drng*|fail*|maint*)
                    # If the node is already offline, and there is no old note, and
                    # we've not been told to ignore that, do not touch the node.
                    if [[ "$OLD_NOTE_LEADER" == "none" && "$IGNORE_EMPTY_NOTE" != "1" ]]; then
                        echo "$0:  Not offlining ${HOSTNAME_S}:  Already offline with no note set."
                        exit 0
                    fi
                    if [[ "$OLD_NOTE_LEADER" == "Reboot" && "$OLD_NOTE" == "ASAP" ]]; then
                        echo "$0:  Not offlining ${HOSTNAME_S}:  Pending reboot."
                        exit 0
                    fi
                    ;;
                boot*)
                    # Offline node after reboot if vanilla `scontrol reboot` was
                    # called, so jobs can't run until NHC onlines the node.
                    # Note: This won't happen while node is waiting to boot,
                    # because $STATUS would show MIX@ or ALLOC@, not BOOT.
                    # See src/common/slurm_protocol_defs.c-->node_state_string()
                    SHOW_NODE_OUTPUT="$($SLURM_SCONTROL show node ${HOSTNAME_S})"
                    if [[ $SHOW_NODE_OUTPUT == *"State=REBOOT"* ]]; then
                        MSG="Temporarily offlining ${HOSTNAME_S} after reboot until NHC can online it"
                        echo "$0:  $MSG"
                        $SLURM_SCONTROL update State=DRAIN NodeName=${HOSTNAME_S} Reason="$LEADER $MSG"
                        exit 0
                    fi

                    # If `Reboot ASAP` has been cleared, then the node is
                    # already set to stay in DRAIN until NHC onlines it, so exit
                    if [[ "$OLD_NOTE_LEADER" != "Reboot" && "$OLD_NOTE" != "ASAP" ]]; then
                        echo "$0:  ${HOSTNAME_S} already set to remain offline after reboot until NHC onlines it"
                        exit 0
                    fi
                    ;;
                *"*")
                    UPTIME=( $(cat /proc/uptime) )
                    UPTIME_SEC="${UPTIME[0]%.*}"
                    if (( UPTIME_SEC < 600 )); then
                        echo "$0: Node not responding, unexpectedly rebooted, ignoring"
                        exit 0
                    fi
                    ;;
            esac
            # `scontrol reboot asap` will set the node state to REBOOT+DRAIN and
            # reason to `Reboot ASAP`. Then, after boot, and after NHC runs
            # once, Slurm will set the node base state to IDLE. If reason ==
            # `Reboot ASAP`, Slurm will also clear the DRAIN flag. We want
            # NHC to clear the DRAIN flag, not Slurm, so delete the
            # `Reboot ASAP` reason by not preserving it below.
            # See https://slurm.schedmd.com/scontrol.html --> reboot

            # If there's an old note that wasn't set by NHC, preserve it.
            if [[ "$OLD_NOTE_LEADER" != "none" && "$OLD_NOTE_LEADER" != "$LEADER" && "$OLD_NOTE_LEADER" != "Reboot" && "$OLD_NOTE" != "ASAP" ]]; then
                LEADER="$OLD_NOTE_LEADER"
                NOTE="$OLD_NOTE"
            fi
            echo "$0:  Marking $STATUS ${HOSTNAME_S} offline:  $LEADER $NOTE"
            exec $SLURM_SCONTROL $SLURM_SC_OFFLINE_ARGS NodeName=${HOSTNAME_S} Reason="$LEADER $NOTE"
            ;;
        down*)
            echo "$0:  Not changing state of down node ${HOSTNAME_S}."
            ;;
        *)  echo "$0:  Not sure how to handle node state \"$STATUS\" on ${HOSTNAME_S}" ;;
    esac

### IBM Platform LSF
elif [[ "$NHC_RM" == "lsf" ]]; then
    LSF_BHOSTS="${LSF_BHOSTS:-bhosts}"
    LSF_BADMIN="${LSF_BADMIN:-badmin}"
    LSF_OFFLINE_ARGS="${LSF_OFFLINE_ARGS:-hclose -C}"

    STATUS=""
    OLD_NOTE_LEADER=""
    OLD_NOTE=""
    IFS=$'\n'
    LINES=( $($LSF_BHOSTS -l $HOSTNAME) )
    IFS=$' \t\n'
    for ((i=0; i < ${#LINES[*]}; i++)) ; do
        LINE=( ${LINES[$i]} )
        if [[ "${LINE[0]}" == "STATUS" ]]; then
            ((i++))
            LINE=( ${LINES[$i]} )
            STATUS="${LINE[0]}"
        elif [[ "${LINE[0]}" == "ADMIN" && "${LINE[2]}" == "COMMENT:" ]]; then
            OLD_NOTE_LEADER="${LINE[3]/\"}"
            OLD_NOTE="${LINE[*]:4}"
            OLD_NOTE="${OLD_NOTE/%\"}"
            break
        fi
    done
    case "$STATUS" in
        ok|closed*)
            if [[ "$STATUS" == "closed_Adm" ]]; then
                # If the node is already offline, and there is no old note, and
                # we've not been told to ignore that, do not touch the node.
                if [[ -z "$OLD_NOTE_LEADER" && "$IGNORE_EMPTY_NOTE" != "1" ]]; then
                    echo "$0:  Not offlining $HOSTNAME:  Already offline with no note set."
                    exit 0
                fi
            fi
            # If there's an old note that wasn't set by NHC, preserve it.
            if [[ -n "$OLD_NOTE_LEADER" && "$OLD_NOTE_LEADER" != "$LEADER" ]]; then
                LEADER="$OLD_NOTE_LEADER"
                NOTE="$OLD_NOTE"
            fi
            echo "$0:  Marking $STATUS $HOSTNAME offline:  $LEADER $NOTE"
            exec $LSF_BADMIN $LSF_OFFLINE_ARGS "$LEADER $NOTE" $HOSTNAME
            ;;
    esac

### Sun Grid Engine (and variants)
elif [[ "$NHC_RM" == "sge" ]]; then
    echo "$0:  No additional node marking necessary for SGE and variants."

### Everything else is unsupported.
else
    echo "$0:  Unsupported RM detected in $0:  \"$NHC_RM\""
    exit -1
fi
exit 0
