# /usr/lib/sleha-functions
# Supporting functions for /usr/sbin/sleha-{init,join}
#
# Copyright (c) 2010-2012 Novell Inc. All Rights Reserved.
#
# Author: Tim Serong <tserong@suse.com>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it would be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# Further, this software is distributed without any warranty that it is
# free of the rightful claim of any third person regarding infringement
# or the like.  Any license provided herein, whether implied or
# otherwise, applies only to this software file.  Patent licenses, if
# any, provided herein do not apply to combinations of this program with
# other software, or any other product whatsoever.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write the Free Software Foundation,
# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
#

declare -r INVOCATION=$(printf '%q ' "$0" "$@")
declare -r LOG_FILE=/var/log/sleha-bootstrap.log
declare -r CSYNC2_KEY=/etc/csync2/key_hagroup
declare -r CSYNC2_CFG=/etc/csync2/csync2.cfg
declare -r COROSYNC_CONF=/etc/corosync/corosync.conf
declare -r SYSCONFIG_SBD=/etc/sysconfig/sbd
declare -r SYSCONFIG_FW=/etc/sysconfig/SuSEfirewall2
declare -r SYSCONFIG_FW_CLUSTER=/etc/sysconfig/SuSEfirewall2.d/services/cluster

declare BE_QUIET=false
declare YES_TO_ALL=false
declare NET_IF=eth0
declare IP_ADDRESS
declare IP_NETWORK

# Broken out as special case for log() failure.  Ordinarily you
# should just use error() to terminate.
_die()
{
	echo "ERROR: $@" >&2
	exit 1
}

error()
{
	log "ERROR: $@"
	_die $@
}

warn()
{
	log "WARNING: $@"
	echo "WARNING: $@" >&2
}

log()
{
	echo $@ >> $LOG_FILE || _die "Can't append to $LOG_FILE - aborting"
}

# Execute something and log it same as "set -x" would do (so you can see
# stdout and stderr from invoked command, and can also copy the command
# itself from the log file and execute manually later if necessary).
# Somewhat maddeningly (depending on your point of view), pathname
# expansion means that "invoke rm .../*" logs the individual files
# removed, not the "rm *".  This is bad (log is expanded command,
# as opposed to command intended, so you have to be careful manually
# executing these later), but also good in that it provides a cheap
# form of auditing...
invoke()
{
	echo + $(printf '%q ' "$@") >> $LOG_FILE
	"$@" >> $LOG_FILE 2>&1
}

start_service()
{
	status "Enabling $1 service"

	invoke chkconfig $1 on
	rc$1 status >/dev/null
	if [ $? -ne 0 ]; then
		invoke rc$1 start
		[ $? -ne 0 ] && error "Failed to start $1 service"
	fi
}

# Determines if a service is chkconfig'd on (not if it's actually running)
service_is_on()
{
	chkconfig $1 | egrep -q "$1[[:space:]]+on"
}

status()
{
	$BE_QUIET && return
	echo "  $1"
}

status_long()
{
	$BE_QUIET && return
	echo -n "  $1..."
}

status_done()
{
	$BE_QUIET && return
	echo "done"
}

probe_partitions()
{
	status_long "Probing for new partitions..."
	invoke partprobe || error "Failed to probe new partitions"
	# ...somehow the new partitions don't always appear immediately?  Need to verify/fix
	invoke sleep 5
	status_done
}

# Configure /etc/sysconfig/SuSEfirewall2.d/services/cluster in much the same
# way as yast2-cluster does.  If that file doesn't exist but the firewall is
# on, warn the user that they'll need to configure things manually (the
# assumption here is that yast2-cluster is generally also installed, so that
# file should be present, and should be formatted reasonably sensibly).
init_firewall()
{
	# ports for csync2, mgmtd, hawk & dlm respectively
	local tcp="30865 5560 7630 21064"

	# all mcastports defined in corosync config
	local udp
	if [ -f "$COROSYNC_CONF" ]; then
		udp=$(echo $(awk -F: '/^[[:space:]]*mcastport:/ {print $2}' \
			${COROSYNC_CONF}))
	fi

	local -i fw_open=0
	if [ -f "$SYSCONFIG_FW_CLUSTER" ]; then
		local tmp_conf=${SYSCONFIG_FW_CLUSTER}.$$
		sed \
			-e "s/^TCP=.*/TCP=\"$tcp\"/" \
			-e "s/^UDP=.*/UDP=\"$udp\"/" \
			$SYSCONFIG_FW_CLUSTER > $tmp_conf
		install_tmp $tmp_conf $SYSCONFIG_FW_CLUSTER

		if ! grep -q '^FW_CONFIGURATIONS_EXT=.*cluster' $SYSCONFIG_FW ; then
			tmp_conf=${SYSCONFIG_FW}.$$
			sed \
				-e 's/^\(FW_CONFIGURATIONS_EXT="\)\(.*\)/\1cluster \2/' \
				$SYSCONFIG_FW > $tmp_conf
			install_tmp $tmp_conf $SYSCONFIG_FW
		fi
		fw_open=1
	fi

	# No need to do anything else if the firewall is inactive
	rcSuSEfirewall2 status >/dev/null 2>&1 || return

	# Firewall is active, either restart or complain if we couldn't tweak it
	if [ $fw_open -eq 1 ]; then
		status "Restarting firewall (TCP $tcp and UDP ${udp:-none} open)"
		invoke rcSuSEfirewall2 restart \
			|| error "Failed to restart firewall"
	else
		warn "Firewall is active - you may need to open ports:"
		warn "	TCP $tcp and UDP ${udp:-none}"
	fi
}

wait_for_cluster()
{
	status_long "Waiting for cluster"
	while ! crm_mon -1 | grep -qi online; do
		$BE_QUIET || echo -n "."
		sleep 5
	done
	status_done
}

init_cluster_local()
{
	# Caller should check this, but I'm paranoid...
	rcopenais status >/dev/null
	[ $rc -ne 7 ] && error "openais service is not stopped!"

	# reset password, but only if it's not already set
	local ps=$(passwd -S hacluster 2>/dev/null | cut -d' ' -f2)
	local pass_msg
	if [ "$ps" != "PS" ]; then
		log ': Resetting password of hacluster user'
		echo "linux" | passwd --stdin hacluster > /dev/null
		pass_msg=", password 'linux'"
	fi

	# evil, but necessary
	invoke rm -f /var/lib/heartbeat/crm/*

	start_service hawk
	status "  HA Web Konsole is now running, to see cluster status go to:"
	[ -n "$IP_ADDRESS" ] && status "    https://$IP_ADDRESS:7630/" \
		|| status "    https://SERVER:7630/"
	status "  Log in with username 'hacluster'${pass_msg}"

	[ -n "$pass_msg" ] && \
		warn "You should change the hacluster password to something more secure!"

	start_service openais
	wait_for_cluster
}

confirm()
{
	$YES_TO_ALL && return 0

	local tmp
	read -e -p  "  $1 [y/N] " tmp
	[ "$tmp" != "y" -a "$tmp" != "Y" ] && return 1
	return 0
}

prompt_for_string()
{
	local msg=$1
	local match=$2
	local default=$3
	local val
	local tmp

	if $YES_TO_ALL; then
		# Warning: this will return an empty string if
		# there's no default - callers be prepared!
		echo $default
		return
	fi

	while [ -z "$val" ]; do
		read -e -p "  $msg [$default] " tmp
		[ -n "$tmp" ] && default=$tmp
		[[ "$default" =~ $match ]] && val=$default \
			|| echo "    Invalid value entered" >&2
	done

	echo $default
}

install_tmp()
{
	log ": created $1 with content:"
	cat $1 >> $LOG_FILE
	invoke sync
	invoke mv -f $1 $2
	[ $? -ne 0 ] && error "Failed to install $2 (temp is $1)"
}

# Call with $1 == replace|update, $2 == file
crm_configure_load()
{
	log ": loading crm config from $2, content is:"
	cat $2 >> $LOG_FILE
	invoke crm configure load $1 $2 \
		|| error "Failed to load cluster configuration from $2"
	invoke rm $2
}

# Call with $1 == message, $2 == resource
wait_for_resource()
{
	status_long "$1"
	while ! crm_resource --locate --resource $2 2>&1 | grep -q 'running on'; do
		$BE_QUIET || echo -n '.'
		sleep 1
	done
	status_done
}

# Call with $1 == message, $2 == resource
# TODO: refactor this? (it's almost identical to wait_for_resource)
wait_for_stop()
{
	status_long "$1"
	while ! crm_resource --locate --resource $2 2>&1 | grep -q 'NOT running'; do
		$BE_QUIET || echo -n '.'
		sleep 1
	done
	status_done
}

random_256()
{
	local -i n=$RANDOM
	let 'n %= 256'
	echo $n
}

# TODO: is there a better way to do this?  I just want to log
# the "cat >>" nicely...
append()
{
	log "+ cat $1 >> $2"
	cat $1 >> $2
}

my_hostname_resolves()
{
	perl -e "gethostbyname('$(hostname)') && exit 0 || exit 1;"
}

# Call with $1 == hostname, $2 == new line to add to /etc/hosts
# if there's no entry already for hostname.  Make sure you quote
# the new line so it comes in as one arg!
etc_hosts_add_one()
{
	local newhost=$1
	local newline=$2

	# If host doesn't already exist in /etc/hosts, add it
	if ! egrep -q -s \
		"^[[:digit:]]+.*\\b${newhost}\\b" \
		/etc/hosts
	then
		local tmp_conf=/etc/hosts.$$
		cp /etc/hosts $tmp_conf
		echo "$newline" >> $tmp_conf
		install_tmp $tmp_conf /etc/hosts
	else
		log ": Not updating /etc/hosts - remote host $newhost already exists"
	fi
}

# Returns success if there's an entry for the current node
# (regardless of whether it's mapped to localhost or a real IP)
etc_hosts_has_me()
{
	egrep -q "^[[:digit:]]+.*\\b$(hostname)\\b" /etc/hosts
}

# Prints the line from /etc/hosts for the current node (unless
# it maps to localhost), in which case you get nothing)
etc_hosts_get_me()
{
	egrep "^[[:digit:]]+.*\\b$(hostname)\\b" /etc/hosts | grep -v '^127'
}

# Adds current node to /etc/hosts if it doesn't already exist
etc_hosts_add_me()
{
	etc_hosts_has_me && return

	status "Adding entry for $(hostname) to /etc/hosts"
	[ -n "$IP_ADDRESS" ] || error "Unable to determine this node's IP address"
	etc_hosts_add_one $(hostname) "$IP_ADDRESS $(hostname)"
}

_onexit()
{
	local rc=${1:-$?}
	log '----------------------------------------------------------------'
	log "$(date --rfc-3339=seconds) exited (rc=$rc)"
	log '================================================================'
	# don't double-trap on normal exit
	trap - 0
	exit $rc
}

check_prereqs()
{
	local -i warned=0

	if ! my_hostname_resolves ; then
		warn "Hostname '$(hostname)' is unresolvable - csync2 won't work."
		warn "You should add an entry to /etc/hosts or configure DNS."
		warned=1
	fi

	if ! service_is_on ntp ; then
		warn "NTP is not configured to start at system boot."
		warned=1
	fi

	if [ $warned -ne 0 ]; then
		confirm 'Do you want to continue anyway?' || exit
	fi

	init_firewall
}

init()
{
	log_start
	init_network
}

log_start()
{
	# Convenient side-effect: this will die immediately if the log file
	# is not writable (e.g. if not running as root)
	log '================================================================'
	log "$(date --rfc-3339=seconds) $INVOCATION"
	log '----------------------------------------------------------------'

	# Now that we know we have at least some chance of running, catch
	# all the interesting signals so we can terminate the log neatly.
	trap _onexit 0 1 2 3 10 12 13 14 15
}

init_network()
{
	# Auto-detection of IP address and netmask only works if $NET_IF is
	# up. If $IP_ADDRESS is overridden, network detect shouldn't work,
	# because "ip route" won't be able to help us.
	if [ -z "$IP_ADDRESS" ]; then
		IP_ADDRESS=$(ip -o -f inet addr show primary | \
			awk "/[[:space:]]${NET_IF}[[:space:]]/ {print \$4} " |
			sed 's/\/[1-9][1-9]*//')
	fi
	if [ -z "$IP_NETWORK" -a -n "$IP_ADDRESS" ]; then
		IP_NETWORK=$(ip route list | \
			awk "/${IP_ADDRESS}/ { print \$1; }" |
			sed 's/\/[1-9][1-9]//')
	fi

	[ -z "$IP_ADDRESS" ] && \
		warn "Could not detect IP address for $NET_IF"
	[ -z "$IP_NETWORK" ] && \
		warn "Could not detect network address for $NET_IF"
}
