#!/bin/sh

# Name: pestat
# ------------
# Torque resource manager utility script: Print a 1-line summary of jobs on each node.
# Usage: Run "pestat -h" for help information.
#
# Colors may be used in the output (also controlled by the PESTAT_COLOR environment variable).
# The printout at the end may be customized if needed.
#
# Netload information:
# --------------------
# The Torque pbs_mom records network load information as the sum of transmit+receive
# of all interfaces.
# The "netload" information is defined in the source file ./src/resmom/linux/mom_mach.c
# as the sum of bytes on all network interfaces since boot time, read from /proc/net/dev.
# The pestat command (from version 2.9) prints delta-netload information when run twice
# with some time interval in between. The file $NETLOADFILE stores recorded information.
#
# The baseline netload information may be generated from cron, say, every 10 minutes
# by this crontab entry:
# */10 * * * * /usr/bin/pestat -C > /dev/null
# Otherwise the default netload file is the user-specific file netload.$USER.
# Subsequent pestat commands will use the baseline netload.

# Author: Ole.H.Nielsen@fysik.dtu.dk
# URL: ftp://ftp.fysik.dtu.dk/pub/Torque/pestat
VERSION="pestat version 2.13.  Date: 6 February 2013"

# Locations of command and directories
PBSNODES=/usr/bin/pbsnodes
PBSCONFIG=/usr/bin/pbs-config
QSTAT=/usr/bin/qstat
AWK=/bin/awk

# Node names have different lengths at different sites,
# so configure this printf string to accomodate your longest node name + 1 (>= 5 chars)
NODENAMEFORMAT="%-20s"

# The pestat status directory (must be secure location for root)
PESTAT_LIBDIR=/var/lib/pestat
NETLOAD_CRON=$PESTAT_LIBDIR/netload.cron
if test ! -d $PESTAT_LIBDIR -a "$USER" = "root"
then
	mkdir -v $PESTAT_LIBDIR
fi

# Minimum age of NETLOADFILE (seconds): If less than this value the netload information may not be reliable:
NETLOADFILE_MINAGE=10

# Command usage:
function usage()
{
cat <<EOF
Usage: $0 [-f] [-c|-n] [-d] [-V] [-u username|-g groupname] [-j jobs] [-C] [-h]
where:
	-f: Listing only nodes that are flagged by \*
	-d: Listing also nodes that are down
	-c/-n: Color/no color output
	-u username: Print only user <username> (do not use with the -g flag)
	-g groupname: Print only users in group <groupname>
	-j jobs: List only nodes with at least <jobs> running jobs 
	-C: Use with cron: Netload file will be saved as $NETLOAD_CRON
	-h: Print this help information
	-V: Version information
EOF
}

#
# Netload information from Torque pbs_mom will be printed
#
netloadprint=1
# Temporary file for Netload information:
if test -s $NETLOAD_CRON
then
	# If NETLOADFILE has been generated by a cron-job as $NETLOAD_CRON we use this file
	NETLOADFILE=$NETLOAD_CRON
	NETLOADWRITE=0
else
	# Default: Per-user netload file
	NETLOADFILE=/var/tmp/netload.$USER
	NETLOADWRITE=1
fi

# Scaling of the network load:
# Default value:
NETLOADSCALE=1
# If you use Linux port bonding, each network byte is counted twice by Torque: bond0 plus ethX devices
# therefore you need to scale down the netload by a factor of 2:
# NETLOADSCALE=2

# Netload threshold above which we flag this node:
# Netload > 2000 Mbit/s is flagged (we have dual-Gigabit Ethernet)
# Gigabit Ethernet full-duplex is 2*1000=2000 Mbit/s
NETLOADTHRES=2000

#
# Default parameter values
#
# Omit down nodes from the flagged list because we do not wish to see them
# (Use "pbsnodes -l" to list down nodes).
listdownnodes=0

# List only nodes with >= minjobs running jobs (default: minjobs=0)
# This is useful for selecting those nodes that run multiple jobs.
minjobs=0

# Colored output by default
colors=1

# Check user environment variable PESTAT_COLOR for color
if test "$PESTAT_COLOR" = "0"
then
	colors=0
fi

# Check if output is NOT a terminal: Turn colors off (can be overruled by "-c" flag).
FD=1	# File Descriptor no. 1 = stdout
if test ! -t $FD
then
	colors=0
fi

#
# Process command arguments
#
listflagged=0
while getopts "fdcnVu:g:j:Ch" options; do
	case $options in
		f ) 	listflagged=1
			echo Listing only nodes that are flagged by \*
			;;
		d ) 	listdownnodes=1
			# Listing also down nodes
			;;
		c ) 	colors=1
			# Force the use of colors in output
			;;
		n ) 	colors=0
			# Do not use colors in output
			;;
		u ) 	username=$OPTARG
			echo Select only user $username
			;;
		g ) 	groupname=$OPTARG
			echo Select only users in group $groupname
			;;
		j ) 	minjobs=$OPTARG
			echo List only nodes with at least $minjobs running jobs
			;;
		C )	NETLOADFILE=$NETLOAD_CRON
			NETLOADWRITE=1
			;;
		V ) echo $VERSION
			exit 0;;
		h|? ) usage
			exit 1;;
		* ) usage
			exit 1;;
	esac
done

# Test for extraneous command line arguments
if test $# -gt $(($OPTIND-1))
then
	echo ERROR: Too many command line arguments: $*
	usage
	exit 1
fi

if test -n "$username" -a -n "$groupname"
then
	echo ERROR: Do not select both username and groupname
	usage
	exit 1
fi

if test -s $NETLOADFILE
then
	# NETLOADFILE file age in seconds:
	filetime=`stat -c "%Y" $NETLOADFILE`
	now=`date "+%s"`
	netloadage=$(($now-$filetime))
	# Print age (strip extraneous output from "stat")
	echo Netload file $NETLOADFILE age: $netloadage seconds, dated `stat -c "%y" $NETLOADFILE | awk -F. '{print $1}'`
	if test $netloadage -lt $NETLOADFILE_MINAGE
	then
		echo Netload file age should be at least $NETLOADFILE_MINAGE seconds, please try again...
		exit 0
	fi
else
	echo Netload file $NETLOADFILE does not exist: Cannot print netload until next time.
	echo You have to run this command again after some time in order to calculate current network load.
	netloadprint=0
fi

#
# Heading for printout showing:
#
# node:		Node hostname
# state:	Torque state
# load:		CPU load average
# pmem:		Physical memory
# ncpu:		Number of CPUs
# mem:		Physical+virtual memory
# resi:		Resident (used) memory
# usrs:		Number of sessions / Number of users
# Netload:	Number of network I/O bytes
# jobs:		Number of jobs
# jobids/users:	Jobids and corresponding usernames of Torque jobs on this node

#
# Show the Torque node status and parse the results
#
$PBSNODES -a | $AWK -v listflagged=$listflagged -v listdownnodes=$listdownnodes \
	-v colors=$colors -v username=$username -v groupname=$groupname -v minjobs=$minjobs \
	-v NODENAMEFORMAT=$NODENAMEFORMAT \
	-v QSTAT=$QSTAT -v PBSCONFIG=$PBSCONFIG \
	-v netloadprint=$netloadprint -v NETLOADFILE=$NETLOADFILE -v NETLOADWRITE=$NETLOADWRITE \
	-v netloadage=$netloadage -v NETLOADTHRES=$NETLOADTHRES -v NETLOADSCALE=$NETLOADSCALE '
BEGIN {
	# Define terminal colors for the output if requested
	if (colors != 0) {
		# See http://en.wikipedia.org/wiki/ANSI_escape_code#Colors
		RED="\033[1;31m"
		GREEN="\033[1;32m"
		NORMAL="\033[0m"
	}
	# Conversion factor of netload (bytes) to Mbit/sec
	MBITSEC = 1000000*NETLOADSCALE/8
	# Get the Torque version (thanks to Stefan Becuwe <stefan.becuwe@ua.ac.be>)
	# since 2.5.x, output of array jobs in qstat has changed
        # < 2.5    jobid-arrayid ...  (one line per task)
        # >= 2.5   jobid[]            (just one line)
	pbsmajor = 2
	pbsminor = 5
	PBSVERSION = PBSCONFIG " --version"
        while ((PBSVERSION | getline) > 0) {
		split($1, a, ".")
        }
	close(PBSVERSION)
	if ( a[1] >= pbsmajor && a[2] >= pbsminor ) {
		recentpbs = 1
	} else {
		recentpbs = 0
	}

	# Get the list of jobids versus usernames from qstat
	QSTAT = QSTAT " -r"			# Append -r flag (running jobs) to qstat.
	while ((QSTAT | getline) > 0) {		# Parse lines from qstat -r
		if (++line>5) {			# Skip first 5 header lines
			split($1,b,".")		# Jobid is b[1]
			jobuser[b[1]] = $2	# Username of this jobid
		}
	}
	close(QSTAT)

	if (netloadprint == 1) {
		# Read previously recorded netload file (format: nodename network-bytes list-of-jobids)
		while ((getline < NETLOADFILE) > 0) {
			oldnetload[$1] = $2
			for (i=3; i<=NF; i++) oldjobidlist[$1] = oldjobidlist[$1] " " $i
		}
		if (NETLOADWRITE == 1) {
			# Truncate NETLOADFILE
			close(NETLOADFILE)
			print "" > NETLOADFILE
		}
	}

	if (username != "") {
		userselect=1			# We select only this username
		userfound=0
		# Get the list of user full names from passwd lines
		while ("getent passwd" | getline ) {
			split($0,b,":")         # Split password line into fields
			if (username == b[1]) userfound=1
		}
		if (userfound != 1) {
			print RED "ERROR: No such username:" NORMAL, username
			exit 1
		}
	} else if (groupname != "") {
		groupselect=1			# We have to select users in this groupname
		groupfound=0
		# Get the list of group names
		while ("getent group" | getline ) {
			split($0,b,":")         # Split group line into fields
			group[b[3]] = b[1]      # Group name b[1] of this GID (b[3])
			if (groupname == b[1]) groupfound=1
		}
		if (groupfound != 1) {
			print RED "ERROR: No such groupname:" NORMAL, groupname
			exit 1
		}
		# Get the list of user full names from passwd lines
		while ("getent passwd" | getline ) {
			split($0,b,":")         # Split password line into fields
			gidname[b[1]] = group[b[4]]   # Group name of this GID (numeric group id)
		}
	}

	# Print a header line
	printf(NODENAMEFORMAT, "Node")
	print "state  load    pmem ncpu   mem   resi usrs tasks NetMbit jobids/users"
}
#
# Parse the output of pbsnodes
#
NF==1 {	node=$1				# 1st line is nodename
	nodename[node] = node		# Node name
	getline				# Get the next input line
	numjobs[node] = 0               # Torque jobs on the node
	numtasks[node] = 0              # Number of tasks started by Torque on the node
	listnode=0			# Set to > 0 if this node gets flagged
	userusesnode=0			# If this node is used by the selected user 
	groupusesnode=0			# If this node is used by a user in the selected group 
	while (NF >= 3) {		# Read a number of non-blank lines
		if ($1 == "state") {
			if ($3 == "job-exclusive")			state[node] = "excl"
			else if ($3 == "job-exclusive,busy")		state[node] = "busy"
			else if ($3 == "busy")				state[node] = "busy"
			else if ($3 == "free")				state[node] = "free"
			else if ($3 == "offline")			state[node] = "offl"
			else if ($3 == "offline,job-exclusive")		state[node] = "offl"
			else if ($3 == "offline,job-exclusive,busy")	state[node] = "offl"
			else if ($3 == "offline,busy")			state[node] = "offl"
			else if ($3 == "down")				state[node] = "down"
			else if ($3 == "down,offline")			state[node] = "down"
			else if ($3 == "down,job-exclusive")		state[node] = "down"
			else if ($3 == "down,offline,job-exclusive")	state[node] = "down"
			else if ($3 == "down,offline,busy")		state[node] = "down"
			else if ($3 == "down,offline,job-exclusive,busy")	state[node] = "down"
			else if ($3 == "UNKN")				state[node] = "UNKN"
		}
		else if ($1 == "np")		np[node] = $3
		else if ($1 == "properties")	properties[node] = $3
		else if ($1 == "ntype")		ntype[node] = $3
		else if ($1 == "jobs")		numtasks[node] = NF - 2
		else if ($1 == "status") {
			# Get the node status subfields
			split (substr($0,15), a, ",")	# Remove leading "status =", split subfields separated by ","
			for (field in a) {		# Process individual status subfields
				split(a[field],b,"=")	# Split var=value fields
				if (b[1]=="arch")		arch[node]=b[2]
				else if (b[1]=="opsys")		opsys[node]=b[2]
				else if (b[1]=="sessions")	sessions[node]=b[2]
				else if (b[1]=="nsessions")	nsessions[node]=int(b[2])
				else if (b[1]=="nusers")	nusers[node]=b[2]
				else if (b[1]=="idletime")	idletime[node]=b[2]
				else if (b[1]=="totmem")	totmem[node]=b[2]
				else if (b[1]=="availmem")	availmem[node]=b[2]
				else if (b[1]=="physmem")	physmem[node]=b[2]
				else if (b[1]=="ncpus")		ncpus[node]=b[2]
				else if (b[1]=="loadave")	loadave[node]=b[2]
				else if (b[1]=="netload")	netload[node]=b[2]
				else if (b[1]=="size")		size[node]=b[2]
				else if (b[1]=="jobs") {
					# Get the list of jobids/users for this node
					if (b[2] == "? 0") b[2] = ""	# Fix for a bug in pbsnodes ?
					numjobs[node]=split(b[2],c)
					for (i=1; i <= numjobs[node]; i++) {
						split(c[i], d, ".")
						# Get jobid and username
						jobid = d[1]
						tjobid = jobid
						if (recentpbs == 1) {	# For Torque version >= 2.5
							sub(/\[[0-9]+\]/, "[]", tjobid)
						}
						user = jobuser[tjobid]
						# Case where the node pbs_mom has a (dead job) jobid unknown to pbs_server:
						if (length(user) == 0) {	# Flag non-existent username
							user="NONE*"
							usercolor=RED
							listnode++
						} else
							usercolor=NORMAL
						# Append jobid and username to the job list
						jobiduserlist[node] = jobiduserlist[node] " " usercolor jobid " " user NORMAL
						jobidlist[node] = jobidlist[node] " " jobid
						# If this node is used by the selected user
						if (userselect==1 && user == username) userusesnode=1
						# If this node is used by a user in the selected group
						if (groupselect==1 && gidname[user] == groupname) groupusesnode=1
					}
				} else if (b[1]=="rectime")	rectime[node]=b[2]
			}
		}
		getline			# Get the next input line
	}

	if (NETLOADWRITE == 1) {
		# Save netload information to file (format: nodename network-bytes list-of-jobids)
		print nodename[node], netload[node], jobidlist[node] >> NETLOADFILE
	}
	if (netloadprint == 1 && oldnetload[node] > 0) {
		netloadflag=" "
		# Calculate delta-netload
		netload[node] = netload[node] - oldnetload[node]
		if (netload[node] < 0) {
			netload[node] = -1	# Negative values are bad (could be due to recent node reboot)
			netloadflag="*"
		}
		netmbit=netload[node]/(netloadage*MBITSEC)	# Convert netload to Mbit/sec
		if (netmbit > NETLOADTHRES) {
			netmbitcolor=RED	# Netload > NETLOADTHRES is flagged
			listnode++
		} else {
			netmbitcolor=NORMAL
		}
		if (jobidlist[node] != oldjobidlist[node]) {
			netloadflag="!"		# List of jobids has changed: flag this
			# print "Old: ", oldjobidlist[node], "Current: ", jobidlist[node]
		}
	} else {
		netload[node] = 0		# No data available
		netloadflag="*"
	}

	# Is this node used by the selected user? Otherwise skip printout.
	if (userselect==1 && userusesnode==0) next
	# Is this node used by a user in the selected group? Otherwise skip printout.
	if (groupselect==1 && groupusesnode==0) next
	# If this node runs less than minjobs jobs
	if (numjobs[node] < minjobs) next

	# Print out values that we are interested in.  Flag unexpected values with a "*".

	# Flag nodes with status down, offline or unknown
	if (state[node] == "busy" || state[node] == "down" || state[node] == "offl" || state[node] == "UNKN") {
		stateflag="*"
		statecolor=RED
		listnode++
	} else {
		stateflag=" "
		statecolor=NORMAL
	}

	# Flag unexpected CPU load average
	loaddiff = loadave[node] - numtasks[node]
	if (loaddiff > 0.5 || loaddiff < -0.5) {
		loadflag="*"
		loadcolor=RED
		cpucolor=GREEN
		listnode++
	} else {
		loadflag=" "
		loadcolor=NORMAL
		cpucolor=NORMAL
	}

	# Remove "kb" unit from memory sizes
	sub("kb", "", totmem[node])
	sub("kb", "", availmem[node])
	sub("kb", "", physmem[node])
	# Resident memory
	resi = (totmem[node]-availmem[node])/1024
	if (resi > 50 && resi > physmem[node]/1024 - 50) {	# High memory usage
		resiflag="*"
		resicolor=RED
		pmemcolor=GREEN
		listnode++
	} else {
		resiflag=" "
		resicolor=NORMAL
		pmemcolor=NORMAL
	}

	# Flag unexpected number of processes or users
	if (nsessions[node] > 2*ncpus[node] + 1) {	# More than 2 sessions per job
		sessflag="*"
		sesscolor=RED
		listnode++
	} else if (nusers[node] > ncpus[node]) {	# More users than nCPUs is bad
		sessflag="*"
		sesscolor=RED
		listnode++
	} else {
		sessflag=" "
		sesscolor=NORMAL
	}

	# Flag unexpected number of jobs
	if (numjobs[node] > numtasks[node]) {		# Should be at least 1 task per job
		jobflag="*"
		jobcolor=RED
		listnode++
	} else {
		jobflag=" "
		jobcolor=NORMAL
	}

	# Listing of down nodes?
	if (listdownnodes == 0 && state[node] == "down") listnode=0

	# Print a 1-line list for this node
	if (!listflagged || listnode > 0) {
		printf (NODENAMEFORMAT, node)
		printf (" %s%s%1s%s", statecolor, state[node], stateflag, NORMAL)
		printf (" %s%4.2g%1s%s", loadcolor, loadave[node], loadflag, NORMAL)
		printf (" %s%6d%s", pmemcolor, physmem[node]/1024, NORMAL)
		printf (" %s%3d%s", cpucolor, ncpus[node], NORMAL)
		printf (" %6d", totmem[node]/1024)
		printf (" %s%6d%1s%s", resicolor, resi, resiflag, NORMAL)
		printf (" %s%1d/%1d%1s%s", sesscolor, nsessions[node], nusers[node], sessflag, NORMAL)
		printf (" %s%3d%1s%s", jobcolor, numtasks[node], jobflag, NORMAL)
		if (netloadprint == 1) {
			printf (" %s%5d%1s%s", netmbitcolor, netmbit, netloadflag, NORMAL)
		} else
			printf ("     -  ")
		printf ("  %s\n", jobiduserlist[node])
	}
}'
