#!/usr/bin/perl

# padb. a simple parallel debugging aid.

# $URL: https://padb.googlecode.com/svn/branches/3.3/src/padb $
# $Date: 2010-12-08 23:13:41 +0000 (Wed, 08 Dec 2010) $
# $Revision: 426 $

# For help and support visit http://padb.pittman.org.uk
# or email padb-users@pittman.org.uk

# Copyright (C) 2005-2007 Quadrics.
# Copyright (C) 2009-2010 Ashley Pittman.
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA

# Revision history

# Version 3.3
#  * Add a 'launch-mode' option to controll the back-end used to launch
#    the inner processes, previously padb would use the underlying resource
#    manager if possible and pdsh if not, now this is configurable and
#    padb can be told to use either, the resource manager, pdsh or clush.
#    The default setting for this value is "local,rmgr,ssh,pdsh,clush" which
#    mimics the old behaviour but offers the user more choice about the
#    specifics
#  * Change the default to be not to show paramaters and locals in
#    stack traces, it's very useful to have but adds a lot of noise
#    which makes it harder to spot simple problems.
#  * Add proper support for viewing stack traces in threaded
#    programmes, show a full tree-based stack view for each thread-id
#  * Add limited for LSF jobs.
#  * Better handling of gdb errors, add a catch-all signal handler
#    for sigpipe such that if any undetected error occours we can
#    report it rather than crash.
#  * Detect errors introduced by the "yama" security feature in new
#    kernels, this prevents padb from attaching to running jobs if
#    it's enabled.
#  * Convert between hostnames and fully-qualified-domain-names as
#    required to work on systems which use either or a combination
#    of both.
#  * Sort values properly in proc-summary mode, if a value is numeric
#    for all ranks then do a numberical sort, otherwise perform a
#    dictionary sort.
#  * Modify the PBS resource manager to also work on Torque.  These
#    two resource managers are fundamentally the same but differ
#    slightly in the command line options they accept.
#  * Build fixes for edb (For Quadrics systems)
#  * Fix a problem with reading MPI message queues from Fortran programmes,
#    switch gdb into the 'c' language mode when required rather than just
#    using the default all the time.
#  * Fix orte support for systems where OpenMPI was built with check-point
#    restart enabled.
#
# Version 3.2
#  * Support of PBS Pro
#  * Support for OpenMPI jobs run by mpirun under a slurm allocation.
#  * Modify the Slurm resouce manager code to automatically select a
#    step_id based on what's running on the system currently.
#  * Modify the mpd resource manager code to only call mpdlistjobs on
#    the front-end, it provides all the information we need so record
#    this and send it over the network to the inner processes.
#  * Solaris port.  Limited functionality compared to running on Linux
#    however stack trace mode works fully.
#  * Add variables to tree based stack traces.
#  * Add "mpirun" as a resource manager, this causes it walk the local
#    process list looking for processes called mpirun and to get the
#    pid and hostlist by reading data from Mpir_Proctable as specified
#    in the original paper.  Padb then launches itself via pdsh.
#  * For Open-MPI magically dip inside a number of datatypes to print
#    the name as Open-MPI sees them rather than just the struct
#    contents.  This allows us to report communicators, datatypes and
#    reduction operations by name.
#  * Add a --lstopo option to run the lstopo command for each rank.
#    http://www.open-mpi.org/projects/hwloc/
#  * Add a 'command' mode to run abritary commands on the target node.
#  * Add a 'list-threads' mode to report a comma seperated list of
#    threads for each target process.
#  * Add a 'thread-id' configuration option for when collecting stack
#    traces.  This isn't a complete solution which will have to wait
#    for 3.2 but does allow the user to specify which thread within an
#    application is reported on.
#  * Enhance the integration with gdb, use sequence numbers when
#    talking to gdb and check that we get back what we give it.
#    Correctly notice and raise an appropriate error if gdb dies
#    unexpectedly.
#  * Intercept pointer values for variables and instead of showing the
#    pointer value show a description of what it points to using
#    /proc/pid/maps
#  * Nicely indent variables when showing stack traces.  Indent no
#    more than necessary based on the length of the variable name and
#    the length of the type name.
#  * Fix an error where error strings were being passed through
#    sprintf, we now correctly handle errors when the error strings
#    contain % characters
#  * Overhauled the minfo code and the way it interacts with padb.
#    Add significantly better error handling to this code.
#  * Allow tracing of gdb and minfo interaction to log file.  Debug
#    options so require enabling at the source level.
#  * Add a --create-secret-file option to generate the secret file
#    automatically
#  * Add SVN tags to the source file and the the revision id to the
#    output of output of --version
#  * Make proc-format report data for the first thread in a process
#    rather than a random one.
#  * Add support for the proposed new standard for finding the Message
#    queue plugin in MPI programs.
#  * Have padb handle attaching to programs rather than having the mode
#    callback handle it.  This means that persistent attachments can
#    be used in full-report mode.
#  * Speed up attaching gdb to the target job greatly by attaching to
#    all target processes on a node simultanously rather than one at
#    a time.
#  * Better handling of jobs that dissapear whilst we are monitoring them,
#    there should be no perl errors shown if this happens.
#  * Detect where padb is being run from and specify the full path to the
#    inner processes.  This helps with resource managers which don't
#    preserve $PWD and padb isn't on $PATH
#  * Add proper two-pass argunment handling, secondary options are only
#    accepted if the mode they are relevent to is selected.
#  * Widespread code cleanups to conform with stricter coding standards.
#  * Enable type checking of command line options, all boolean flags can be
#    set yes|no|1|0|true|false now.
#
# Version 3.0
#  * Full-duplex communication between inner and outer processes, padb no
#    longer simply sends request on the command line and processes the
#    response but is truly interactive between the inner and outer
#    processes.  This avails greater flexibility in what can be achieved
#    and hopefully helps with scalability as well.
#  * Enabled warnings (-w) by default.  Fixed lots of warnings, mostly
#    about comparing with undef
#  * Much more complete separation into "modes" of operation, most options
#    are now mode specific rather than simply using global variables or
#    global conf options.
#  * Overhaul of the allfns (mode) callbacks and in particular their
#    parameters
#  * Performance improvements.
#  * Simplify the slurm_find_pids() function to just return the output of
#    scontrol listpids
#  * Take the old process-tree walking code from slurm_find_pids() and make
#    it independent and call it for all resource managers.  This allows
#    scripts which call parallel applications to be bypassed and the
#    applications themselves targeted.
#  * Added "port-range" option to limit port usage in case people try and
#    use padb with firewalls enabled.
#
# Version 2.5
#  * First Non-Quadrics version
#  * Various stability/bug fixes.
#  * Deadlock detect at the MPI Layer rather than the Elan layer if running
#    with a patched MPI (Work in progress)
#  * Completely new build and packing procedure to go with the new
#    maintainer.
#  * Added "orte" to the list of resource managers supported
#  * Don't enable local-qsnet on non-qsnet systems.
#  * inner_main() now uses callbacks for resource manager support.
#  * --signal now takes names rather than numbers.
#  * Check job is valid when using the --full-report option.
#  * Add a --proc-summary option to replace --proc-info --proc-format This
#    gives a very efficient "job top" program.
# Version 2.2
# * Add a --core-stack option along with --core and --exe to extract stack
#   traces from core files.
#
# Version 2.1
#  * Add some magic to return complex perl data structures back from the
#    inner callback functions to the output callback
#    function. (nfreeze/base64_encode).
#  * Add "MPI watch" functionality to allow viewing of MPI state in a
#    vmstat like fashion.
#  * Add a --list-rmgrs option to list active resource managers and their
#    jobs.
#  * Add support for "local-qsnet" as a way of launching jobs.
#  * Add support for "local-fd" as a way of launching jobs.
#  * Add support for "mpd" as a way of launching jobs.
#  * Add support for "lsf-rms" as a way of launching jobs.  Note the
#    lsf/rms integration means this is highly unlikely to work for
#    everyone.
#  * Add a -Olsf-job-offset option for finding lsf jobs in the rms
#  * database.
#  * Support for MPI message queues as well as libelan queues (-Q)
#  * Add a -Ominfo=<exe> option for finding the new minfo.x command.
#  * Add a -Ompi-dll=<dll.so> option for overriding the debugger dll.
#  * Extend the gdb handling code to allow further expansion in the future.
#  * Make the strip-below and strip-above functions configurable.
#  * Add support for loading settings from the environment or a config
#    file.
#  * Add support for "local" as a resource manager to target hand-crafted
#    capabilities.
#  * Ignore case when matching stats names.
#  * Correct printing of debug information from the inner.
#  * Try and remove warnings when run with -w (still disabled)
#  * Un-break it on slurm systems without RMS installed.
#  * Preliminary threading support (courtesy of LLNL)
#  * Show per-rail sdram usage of processes.
#  * Look at all descendant processes of slurmstepd rather than direct
#    descendants and try and avoid scripts (perl/sh/bash/xterm)
#  * Use the new scontrol listpids and %A options to squeue for finding
#    processes on slurm systems (1.2.0 and above).
#  * Don't show usage on command line errors.
#  * Only pass command line options onto the inner if it is going to handle
#  * them
#
# Version 2.0
#  * Removed the -OscriptDir option as it's no longer used, use -Oedb
#    instead.
#  * Corrected the way tally statistics were being added.
#  * Added a --show-jobs option to show possible jobs in a resource manager
#    independent way.
#  * Added a --local-stats option to show message statistics for all
#    processes on the local node.
#  * Added a --proc-format option which allows specific entries from /proc
#    to be reported on a per-vp basis.
#  * Ported to slurm only systems where the RMS kernel module isn't
#    present.
#  * Removed the padb-helper.pl file and folded it's functionality into
#    padb itself. Padb is now self-contained.
#  * Removed the padb.gdb file from the kit, it's generated at run-time if
#    needed.
#  * Various readability fixes and small performance improvements.
#  * Added a --kill option along with --signal which can be used to send
#    signals to any process in the parallel job.
#
# Version 1.6
#  * Add a --proc-info option to show the contents of files from /proc for
#    a given rank
#  * Increase the RMS_EXITTIMEOUT value from 10 to 30 seconds and make it a
#    configuration option.
#
# Version 1.5
#  * Try and load edb from where padb is installed. This should allow it to
#    run on elan3 systems where the qsnetdefault link is set to elan3.
#  * GNAT:8110 Proper use of English in the group deadlock detection
#    report.
#  * Target the correct process if there are multiple processes for each
#    vp.  Use the pid of the process which called elan_baseInit()
#  * GNAT 7945: Fix messages on stderr about integer overflow on 32 bit
#    machines
#  * Remove warnings when -w is turned on.
#  * Re-work the stack trace tree generation code do work via a
#    intermediate data structure to make the code easier to parse.
#  * Report errors loading stats from a running job cleanly.
#  * Better backwards compatibility with older RMS releases.
#  * Add a padb-treeview script to the release, this takes most of it's
#    code from padb and uses tk to provide the user with a X based view of
#    the stack traces.
#  * Changes to edb so the stats loading code can run on elan3 systems.
#
# Version 1.4
#  * Bumped version number to 1.4
#  * Change the format of tree based stack traces, it now uses a more
#    logical indention style.
#  * Discover and report if application stats are incomplete.
#  * Allow the use of -r with -s to view statistics from an individual
#    process.  Update -S (which does the same thing) to parse the stats in
#    padb as well.
#  * Improved error handling in the case where jobs complete whilst padb is
#    running, sample the job state before and after going parallel and do
#    the right thing accordingly.
#  * Much improved error output, only report an error if something bad
#    happened.
#  * Changes to the code as required to enable padb to run cleanly with
#    warnings (-w) enabled.
#  * Added a -Ostats-name= option to allow the extraction of one specific
#    statistic from the command line.
#  * Create separate file descriptors for stdout and stderr when running in
#    parallel to make it more resilient.
#
# Version 1.3
#  * Strip stack traces below main when possible, add a
#    --nostrip-below-main option to turn this off.
#  * Strip stack traces above elan_waitWord when possible, add a
#    --nostrip-above-wait option to turn this off.
#  * Added a -Ogdb-retry-count=N option. Defaults to three as before but is
#    now tunable.
#  * Parse communication statistics in padb directly now rather then
#    relying on edb to do it for us
#  * Allow reading of stats from file (-s -i )
#  * Perform group deadlock detection in padb directly rather than in edb,
#    improved the output and handling of corner cases.
#  * Initial version of a "one process per line" method of statistics
#    reporting.
#  * Better catching and reporting of errors when running parallel
#    programs.
#  * Bumped the version number to 1.3
#
# Version 1.2
#  * Converted padb to use long command line options. The short ones still
#    work but now have long alternatives
#  * Removed the need to set -OscriptDir= when running in non-default
#    locations
#  * Added a --full-report=<jobId> option to gather all possible
#    information
#  * General tidy ups in the stack trace tree generation code.
#  * Now reports processes that aren't present when generating stack
#    traces.
#  * Now reports errors properly when there are errors launching the
#    parallel job
#  * Calls edb directly rather than using a helper script when possible
#    (statistics reports).
#  * Incremented version number from 1.0 to 1.2.
#
# TODO:
#
# * More testing with -w turned on.
# * Multi-pass argument handling, --kill also accepts --signal for example,
#   this should really be done at the getopt layer.  Also proper usage info
#   for these secondary args.
# * Parameter checking of secondary args, signal has a hacky implementation
#   and port-range doesn't have any checking currently.
# * libunwind support?  lighter weight than gdb and possibly more reliable.
# * Maybe PMI would help?
# * POD? generated man page?
# * mode specific defaults, for example --mpi-watch should enable --watch
#   -Owatch-clears-screen=0
# * Make -q fallback to -Q if tports are not available
# * ???
# * Allow ranges of ranks to be specified.

###############################################################################

use warnings;
use strict;
use Getopt::Long;
use File::Basename;
use IPC::Open3;
use Cwd;
use Data::Dumper;
use Storable qw(dclone nfreeze thaw);
use Sys::Hostname;
use File::Temp qw(tempfile);
use MIME::Base64;
use Config;
use IO::Socket;
use IO::Select;
use Carp;

###############################################################################
#
# Header.
#
###############################################################################

# Formatted with the command 'perltidy -b -ce -w padb' to maintain a
# vaguely readable form.

# This (large) source file contains a number of loosely separated segments,
# namely...

# Header.
# Resource manager setup
# Config options and defaults
# Usage and version
# Globals
# Elan statistics.
# Group deadlock detection
# Local (per node) stats.
# Stack trace tree compression.
# RMS support.
# Slurm support.
# Resource manager support.
# Output formatting
# Data collection (parallel and from file).
# Outer main
# Inner
# Main.

my $svn_revision_string = '$Revision: 426 $';
my $svn_revision        = 'unknown';

if ( $svn_revision_string =~ m{(\d+)} ) {
    $svn_revision = $1;
}

my $prog    = basename $0;
my $version = "3.3 (Revision $svn_revision)";

my %conf;

my $secret;

# Solaris support!  Tested for and works for stack traces, anything else
# should be considered a bonus at this stage.  Reports are welcome, more so
# if they contain good news.
my $running_on_solaris = 0;

# Test for solaris by checking for a file rather than running uname, it'll
# be quicker.  Note that with this option enabled padb will still function
# correctly on Linux.
if ( -d '/proc/1/path' ) {
    $running_on_solaris = 1;
}

# Config options the inner knows about, only forward options if they are in
# this list.
my @inner_conf =
  qw(edb edbopt rmgr scripts slurm_job_step pbs_server lsf_mode lsfmpi_server lsfmpi_mpirpid lsfmpi_port);

# More options the inner knows about, these are forwarded on the
# command line rather than over the sockets.
my @inner_conf_cmd = qw(port_range outer interval);

###############################################################################
#
# Resource manager setup
#
###############################################################################

# A hash of supported resource managers, each of which provides a number of
# functions for querying the state of the machine.  This keeps the core of
# the code tidy.  Note that this is only for the "outer" instance of the
# program, the inner version does things differently.

# Function        Args Returns   Required Description
# is_installed    -    Bool      yes      Check for being installed and running.
# get_active_jobs user List      yes      Return list of all active job for user.
# is_job_running  job  Bool      no       Check if a given job is running.
# job_to_key      job  key       no       Convert from jobId to shm key.
# find_pids       job  -         maybe    Called on the inner to locate pids.

# In addition one of these two is preferred setup_job has more
# flexibility however setup_pcmd is good enough for most cases.  See
# the setup_jobfunction for full description.
# setup_pcmd      job  cmd|ncpus yes      Command needed to launch shadow jobs.
# setup_job       job            no

# inner_rmgr      var  n/a       no       Resource manager to masquerade as.
# require_inner_callback var n/a no       Resource manager doesn't preserve line
#                                         ordering of stdout.

# Current a single resource manager is assumed which is used for (a)
# discovering jobs (b) launching the shadow job and (c) finding the target
# processes from the inner padb processes.  Two caveats to this exist, the
# "inner_rmgr" setting which allows a resource manager which has specifed
# (a) and (b) to pass the buck onto a different resource manager for (c).
# This is typically used for schedulers or software layers which sit on top
# of the resource manager.  Care need to be taken in this case to convert
# the jobid when switching from outer to inner (only lsf-rms does this
# currently and I'm not 100% sure that still works).  Also the setup_job()
# callback allows resource managers which provide (a) to not provide (b)
# but to rely on padb to launch a shadow job on the host-list it provides.
# Padb uses pdsh for this.
#
# What would be possible however is to split (b) off completely, many
# resource managers launch the shadow job simply by taking a hostlist so it
# would be possible to mix-and-match (a) and (b) from different resource
# managers, perhaps use mpd to query the job, return a host list and then
# use orte to launch the actual job.
#
# For resource managers which don't provide (b) (currently mpirun only but
# expected to grow) padb uses pdsh which is limited in the size of job that
# it can run, one solution to this might be to require say a open-mpi
# install and use orterun to launch the shadow job.  This could have
# benefits elsewhere as well, both the speed of (b) and it's ability to
# interact with padb (for port number forwarding) are crucial for
# scalability, having a single stack for padb to sit on would allow
# concentration of tuning effort in a single place which is something everyone
# could benefit from.

my %rmgr;

$rmgr{mpirun} = {
    get_active_jobs => \&mpirun_get_jobs,
    job_is_running  => \&local_job_is_running,
    setup_job       => \&mpirun_setup_job,
};

$rmgr{rms} = {
    is_installed    => \&rms_is_installed,
    get_active_jobs => \&rms_get_jobs,
    job_is_running  => \&rms_job_is_running,
    job_to_key      => \&rms_job_to_key,
    setup_pcmd      => \&rms_setup_pcmd,
    find_pids       => \&rms_find_pids,
};

$rmgr{mpd} = {
    is_installed           => \&mpd_is_installed,
    get_active_jobs        => \&mpd_get_jobs,
    setup_job              => \&mpd_setup_job,
    require_inner_callback => 1,
};

$rmgr{orte} = {
    is_installed    => \&open_is_installed,
    get_active_jobs => \&open_get_jobs,
    setup_job       => \&open_setup_job,
};

$rmgr{'lsf-rms'} = {
    is_installed    => \&lsf_is_installed,
    get_active_jobs => \&lsf_get_jobs,
    setup_pcmd      => \&lsf_setup_pcmd,
    inner_rmgr      => 'rms',
};

$rmgr{slurm} = {
    is_installed           => \&slurm_is_installed,
    get_active_jobs        => \&slurm_get_jobs,
    job_is_running         => \&slurm_job_is_running,
    setup_job              => \&slurm_setup_job,
    find_pids              => \&slurm_find_pids,
    require_inner_callback => 1,
};

$rmgr{local} = {
    get_active_jobs => \&local_get_jobs,
    job_is_running  => \&local_job_is_running,
    setup_job       => \&local_setup_job,
};

$rmgr{'local-qsnet'} = {
    is_installed    => \&local_q_is_installed,
    get_active_jobs => \&local_q_get_jobs,
    job_is_running  => \&local_job_is_running,
    setup_job       => \&local_setup_job,
    inner_rmgr      => 'local',
};

$rmgr{'local-fd'} = {
    get_active_jobs => \&local_fd_get_jobs,
    job_is_running  => \&local_job_is_running,
    setup_job       => \&local_setup_job,
    inner_rmgr      => 'local',
};

$rmgr{pbs} = {
    is_installed    => \&pbs_is_installed,
    get_active_jobs => \&pbs_get_jobs,
    setup_job       => \&pbs_setup_job,
    find_pids       => \&pbs_find_pids,
};

$rmgr{lsf} = {
    is_installed    => \&lsfmpi_is_installed,
    get_active_jobs => \&lsfmpi_get_jobs,
    setup_job       => \&lsfmpi_setup_pcmd,
    find_pids       => \&lsfmpi_find_pids,
};

###############################################################################
#
# Config options
#
###############################################################################

# If changing any of these defaults also check the inner code as some of
# these settings affect that program as well and padb will only pass on
# settings on the command line, not the entire config hash.  The reason
# they are listed here as well is so that padb -O help works and gives the
# correct defaults.

my %allfns;

my %cinner;        # Config options to be passed to inner.
my %cinner_cmd;    # Config options to be passed to inner.

my $rem_jobid;

# Debug options.
$conf{verbose} = 0;

# Valid values are "none" "missing" or "all".  Anything not recognised is
# treated as "all".
$conf{check_signon} = 'all';

# Output options.
$conf{interval}            = '10s';
$conf{watch_clears_screen} = 'enabled';
$conf{scripts}             = 'bash,sh,dash,ash,perl,xterm';
$conf{mpirun}              = 'mpirun,orterun,srun,mpdrun,prun,mpiexec';
$conf{lsf_job_offset}      = 1;
$conf{local_fd_name}       = '/dev/null';
$conf{inner_callback}      = 'disabled';

# These two are used by deadlock and QsNet group code, they need migrating
# in the group code when I have access to a test system again.
#$conf{"show-group-members"}  = 0;
#$conf{"show-all-groups"}     = 0;

# Tuning options.
$conf{prun_timeout}     = '2m';
$conf{prun_exittimeout} = '2m';
$conf{rmgr}             = undef;

$conf{slurm_job_step} = undef;
$conf{orte_job_step}  = undef;

$conf{pbs_server} = undef;

# These settings are passed onto inner only.
$conf{edbopt} = undef;

$conf{edb} = find_edb();

# Option to define a list of ports used by padb.
$conf{port_range} = undef;

$conf{tree_width} = '4';

$conf{launch_mode} = 'local,rmgr,ssh,pdsh,clush';

# Config options which take boolean values.
my @conf_bool = qw(watch_clears_screen inner_callback);

# Config options which take a time value.
my @conf_time = qw(prun_exittimeout prun_timeout interval);

# Config options which take an integer.
my @conf_int = qw(lsf_job_offset slurm_job_step orte_job_step tree_width);

my $norc       = 0;
my $configfile = '/etc/padb.conf';

# Standard regexpes for splitting on comma, equals and spaces.  Note the
# space regexp matches multiple whitespace characters.
my $COMMA  = qr{,}x;
my $EQUALS = qr{=}x;
my $SPACE  = qr{\s+}x;
my $COLON  = qr{:}x;
my $PERIOD = qr{\.}x;

my $EMPTY_STRING = q{};

# Enable sorting in Data::Dumper for clarity.
$Data::Dumper::Sortkeys  = 1;
$Data::Dumper::Quotekeys = 0;

sub check_and_convert_bool {
    my ($str)    = @_;
    my @enabled  = qw(1 yes on true enabled);
    my @disabled = qw(0 no off false disabled);
    my %bool_table;
    map { $bool_table{$_} = 1 } @enabled;
    map { $bool_table{$_} = 0 } @disabled;

    if ( defined $bool_table{$str} ) {
        return $bool_table{$str};
    }
    printf {*STDERR} "Boolean value \"%s\" not recognised, aborting.\n", $str;
    exit 1;
}

sub check_and_convert_time {
    my ($str) = @_;
    if (
        $str =~ m{\A      # Start of line
                  (\d+)   # A number
                  (s|m)?  # With an option s or m suffix.
                  \z}x
      )
    {
        if ( defined $2 and $2 eq 'm' ) {
            return $1 * 60;
        }
        return $1;
    }
    printf {*STDERR} "Time value \"%s\" not recognised, aborting.\n", $str;
    exit 1;
}

sub is_digit {
    my ($str) = @_;

    return $str =~ m{\A     # Start of line
		   \d+    # A number
		   \z}x
}

sub check_int {
    my ($str) = @_;

    return if ( is_digit($str) );

    printf {*STDERR} "Integer value \"%s\" not recognised, aborting.\n", $str;
    exit 1;
}

sub check_signal {
    my ($signal) = @_;

    my $s = uc $signal;
    my %sig_names;
    foreach ( split $SPACE, $Config{sig_name} ) {
        $sig_names{$_} = 1;
    }

    if ( not defined $sig_names{$s} ) {
        printf {*STDERR} "Error: signal \"%s\" is invalid, aborting.\n",
          $signal;
        exit 1;
    }
    return $s;
}

# Look for edb in the default install location only.
sub find_edb {
    return '/usr/lib/qsnet/elan4/bin/'
      if ( -d '/usr/lib/qsnet/elan4/bin/' );
    return '/usr/lib64/qsnet/elan4/bin/'
      if ( -d '/usr/lib64/qsnet/elan4/bin/' );
    return 'edb';
}

# Look for minfo in the filesystem.  If it appears that padb has been
# installed then look for minfo in the directory where it would have been
# installed to.  If that's not the case or it's not there then look in the
# same directory as padb is running from.
sub find_minfo {
    my $self = $0;
    if ( $self =~ m{\A(.+)/bin/padb\z} ) {
        my $dir = $1;
        if ( -f "$dir/libexec/minfo" ) {
            return "$dir/libexec/minfo";
        }
    }
    my $dir = dirname($self);
    return "$dir/minfo";
}

###############################################################################
#
# usage and version.
#
###############################################################################

sub show_version {
    print "$prog version $version\n\n";
    print "Written by Ashley Pittman\n";
    print "http://padb.pittman.org.uk\n";
    exit 0;
}

my $usage = <<'EOF';
Usage: padb [-hv] [-c|-C|-t] -g|-q|-s|-x|-X [-O <opt>=<val>
            [,<opt>=<val>...]] [-i <file>] [-r <rank>] [-u <user>]
            -a|-A|<jobid ...>

-a --all               report on all running jobs for user.
-A --any               report on a running job for user.
-u --user=<USER>       report on jobs for username=<user>.

-r --rank=<RANK>       report only on processes <RANK>.
   --group-id=<ID>     report only on group <ID>.

XXXX
   --full-report=<JOBID> Generate a full report of job state.

   --no-strip-below-main Don't strip stack traces below main.
   --no-strip-above-wait Don't strip stack traces about elan_waitWord.

   --proc-format       Specify information to show about processes.

-c --compress          Use dshbac -c format.
-C --compress-long     Use other dshbak format.
-t --tree              Use tree based output for stack traces.
-i --input-file=FILE   Read input from file.

   --watch

   --debug=<mode>,<mode1>  Enable debug for mode, use mode=all for all debugging.
   --debug-file=file   Log debug information to file.

-O [opt1=val,<opt2=val>] Set internal config options for padb, advanced use only.
  Options in this version (these are liable to change)
  Use -Ohelp for showing current settings

  General options:
  verbose              Set verbosity level.
  edb                  Full path to edb

  Slurm only options
  slurm-job-step       Job step to target.

  RMS only options
  prun-timeout         Timeout to use when launching parallel job.

  Stack trace options:
  gdb-retry-count      Number of times to try getting a 'good' stack trace from gdb.
  stack-shows-params    Show function parameters in stack traces.
  stack-shows-locals    Show locals in stack traces.

  Statistics options:
  stats-short          Turn on "one process per line" stats reporting code.
  stats-sort-key       Sort stats by <key>.
  stats-reverse        Reverse order when showing stats.
  stats-name           Only report the value of a single stat.

  Group deadlock detection options:
  show-group-members   Show group to vp translations in the group code.
  show-all-groups      Report on all groups in a job.

  Watch options:
  interval             Refresh rate.

-v --verbose           Verbose.
-V --version           Show version number and exit.
-h --help              print this usage message.

EOF

sub usage {
    chomp $usage;

    my $extra = $EMPTY_STRING;
    $extra .= "Modes of operation\n";
    foreach my $arg ( sort keys %allfns ) {
        next unless ( defined $allfns{$arg}{help} );
        next if ( defined $allfns{$arg}{qsnet} );
        if ( defined $allfns{$arg}{arg_short} ) {
            $extra .= "-$allfns{$arg}{arg_short}";
        } else {
            $extra .= '  ';
        }
        $extra .= sprintf " --%-18s%s.\n",
          $allfns{$arg}{arg_long},
          $allfns{$arg}{help};
    }

    $extra .= "\nQsNet specific modes\n";
    foreach my $arg ( sort keys %allfns ) {
        next unless ( defined $allfns{$arg}{help} );
        next unless ( defined $allfns{$arg}{qsnet} );
        if ( defined $allfns{$arg}{arg_short} ) {
            $extra .= "-$allfns{$arg}{arg_short}";
        } else {
            $extra .= '  ';
        }
        $extra .= sprintf " --%-18s%s.\n",
          $allfns{$arg}{arg_long},
          $allfns{$arg}{help};
    }

    $usage =~ s{XXXX}
               {$extra}xms;

    print {*STDERR} $usage;
    exit 1;
}

###############################################################################
#
# Globals.
#
###############################################################################

my $target_user = getpwuid $<;
my $rank_rng;

my @target_groups;
my $all;
my $any;

# Number of functions provided on the command line from the allfns hash.
my $have_allfns_option = 0;

my $full_report;
my $core_stack;
my $list_rmgrs;
my $create_secret;
my $watch;
my $local_stats;
my $show_jobs;

my $core_name;
my $exe_name;

my $input_file;
my $output_compress;
my $output_compress_long;
my $output_tree;

my %config_options;

my %ic_names;
my %ic_names_cmd;

# Debugging: this function is called periodically with a mode, an arbitrary
# ref and a string, it can either print simply the string or call dumper on
# the ref as well.  Enable with --debug=type1,type2=all
my %debug_modes;
my $start_time = time;
my $debug_fd;

sub set_debug_file {
    my ($filename) = @_;

    if ( defined $filename ) {
        if ( not open $debug_fd, '>', $filename ) {
            print "Unable to open log file for writing: $!\n";
            $debug_fd = *STDOUT;
        }
    } else {
        $debug_fd = *STDOUT;
    }

    return;
}

sub debug_log {
    my ( $type, $handle, $str, @params ) = @_;
    if ( not exists $debug_modes{$type} ) {
        print "Unknown debug mode: $type\n";
        exit 1;
    }
    return unless $debug_modes{$type};
    my $time = time - $start_time;
    printf {$debug_fd} "DEBUG ($type): %3d: $str\n", $time, @params;
    return if $debug_modes{$type} eq 'basic';
    return unless defined $handle;
    print {$debug_fd} Data::Dumper->Dump( [$handle], [$type] );
    return;
}

# Valid debug modes, a full list is maintained here so using unexpected
# ones can generate warnings.
$debug_modes{full_duplex} = undef;
$debug_modes{show_cmd}    = undef;
$debug_modes{all}         = undef;
$debug_modes{tree}        = undef;
$debug_modes{verbose}     = undef;
$debug_modes{signon}      = undef;
$debug_modes{rmgr}        = undef;
$debug_modes{ctree}       = undef;
$debug_modes{tdata}       = undef;
$debug_modes{config}      = undef;
$debug_modes{pcmd}        = undef;

sub slurp_file {
    my ($file) = @_;
    open my $FD, '<', $file or return;
    my @contents = <$FD>;
    close $FD;
    return @contents;
}

sub slurp_cmd {
    my ($cmd) = @_;
    open my $CFD, '-|', "$cmd 2>/dev/null" or return;
    my @out = <$CFD>;
    close $CFD;
    return @out;
}

sub slurp_remote_cmd {
    my ( $host, $cmd ) = @_;
    return slurp_cmd("ssh $host $cmd");
}

sub slurp_dir {
    my ($dir) = @_;
    opendir my $DIR, $dir or return;
    my @files = readdir $DIR;
    closedir $DIR;
    return @files;
}

# Return an array of current processes for a given user.
sub get_process_list {
    my ($user) = @_;
    if ($running_on_solaris) {
        my @procs = slurp_cmd("ps -o pid= -u $user");
        chomp @procs;
        my @clean_procs;
        foreach my $proc (@procs) {
            $proc =~ s{\s*}{}g;
            push @clean_procs, $proc;
        }
        return @clean_procs;
    }
    my $uid = getpwnam $user;
    return unless defined $uid;
    my @pids = slurp_dir('/proc');
    my @userpids;
    foreach my $pid (@pids) {
        next unless ( $pid =~ m{\A\d+\z}xms );
        my ( undef, undef, undef, undef, $owner ) = stat "/proc/$pid";

        # Check the stat worked, it's possible for processes to disappear
        # Take care to check for defined rather than true as root has a uid
        # of zero.
        next unless defined $owner;
        next unless $owner == $uid;
        push @userpids, $pid;
    }
    return @userpids;
}

# Return the process list for a given user, return a hash indexed by pid
# and containing the parent pid for the given process.
sub get_extended_process_list {
    my ($user) = @_;
    if ($running_on_solaris) {
        my @procs = slurp_cmd("ps -o pid= -o ppid= -u $user");
        my %procs;
        foreach my $proc (@procs) {

            # A little bit of magic here, ps sometimes left pads its
            # output with a space which the regexp matches.  To avoid
            # problems strip leadinging whitespace before we split so
            # that $pid is the first entry returned from the split
            # command.
            $proc =~ s{\A$SPACE}
	              {}x;
            my ( $pid, $ppid ) = split $SPACE, $proc;
            $procs{$pid} = $ppid;
        }
        return %procs;
    }
    my $uid = getpwnam $user;
    return unless defined $uid;
    my @pids = slurp_dir('/proc');
    my @userpids;
    my %procs;
    foreach my $pid (@pids) {
        next unless ( $pid =~ m{\A\d+\z}xms );
        my ( undef, undef, undef, undef, $owner ) = stat "/proc/$pid";

        # Check the stat worked, it's possible for processes to disappear
        # Take care to check for defined rather than true as root has a uid
        # of zero.
        next unless defined $owner;
        next unless $owner == $uid;
        my $ppid = find_from_status( $pid, 'PPid' );
        if ( defined $ppid ) {
            $procs{$pid} = $ppid;
        }
    }
    return %procs;
}

sub parse_args_outer {

    Getopt::Long::Configure( 'bundling', 'pass_through' );
    my $debugflag;
    my $debugfile;

    my @ranks;

    my %optionhash = (
        'verbose|v+'          => \$conf{verbose},
        'user|u=s'            => \$target_user,
        'rank|r=s'            => \@ranks,
        'group-id=s'          => \@target_groups,
        'help|h'              => \&usage,
        'all|a'               => \$all,
        'any|A'               => \$any,
        'version|V'           => \&show_version,
        'compress|c'          => \$output_compress,
        'compress-long|C'     => \$output_compress_long,
        'tree|t'              => \$output_tree,
        'input-file|file|i=s' => \$input_file,
        'full-report=s'       => \$full_report,
        'core-stack'          => \$core_stack,
        'core=s'              => \$core_name,
        'exe=s'               => \$exe_name,
        'list-rmgrs'          => \$list_rmgrs,
        'watch'               => \$watch,
        'local-stats'         => \$local_stats,
        'show-jobs'           => \$show_jobs,
        'norc'                => \$norc,
        'config-file=s'       => \$configfile,
        'debug=s'             => \$debugflag,
        'debug-file=s'        => \$debugfile,
        'create-secret-file'  => \$create_secret,
    );

    # The primary modes, one of these only must be set.
    my %config_hash;

    foreach my $arg ( keys %allfns ) {

        # Set the primary mode in the hash.
        $optionhash{ to_arg( $allfns{$arg} ) } = \$config_hash{$arg};
    }

    # Parse the options once to pick up the mode and any single letter
    # options which might be bundled with it.
    GetOptions(%optionhash);

    set_debug_file($debugfile);

    Getopt::Long::Configure( 'default', 'bundling' );

    my $mode;

    foreach my $arg ( keys %config_hash ) {
        next unless defined $config_hash{$arg};
        $mode = $arg;
        $have_allfns_option++;
    }

    # The secondary args, specify all of them for now as we only call
    # GetOptions once.
    my %sec_args;

    # Set any extra options this mode may or may not accept.
    if ( defined $mode and defined $allfns{$mode}{secondary} ) {
        foreach my $sec ( @{ $allfns{$mode}{secondary} } ) {
            $optionhash{ to_arg($sec) } = \$sec_args{ $sec->{arg_long} };
        }
    }

    # Set this for the second iteration only so that GetOptions can abort
    # correctly if they are called without a value.
    $optionhash{'config-option|O=s'} = \%config_options;

    GetOptions(%optionhash) or exit 1;

    if ( defined $debugflag ) {
        foreach my $f ( split $COMMA, $debugflag ) {
            my ( $name, $v ) = split $EQUALS, $f;
            if ( exists $debug_modes{$name} ) {
                $debug_modes{$name} = defined $v ? $v : 'basic';
            } else {
                print "Attempt to set unknown debug flag \"$name\".\n";
            }
        }
        if ( $debug_modes{all} ) {
            foreach my $mode ( keys %debug_modes ) {
                if ( not defined $debug_modes{$mode} ) {
                    $debug_modes{$mode} = $debug_modes{all};
                }
            }
        }
    }

    if (@ranks) {
        $rank_rng = rng_convert_from_user( shift @ranks );

        foreach my $rank (@ranks) {
            rng_merge( $rank_rng, rng_convert_from_user($rank) );
        }
    }

    foreach my $arg ( keys %allfns ) {
        if ( defined $allfns{$arg}{options_i} ) {
            foreach my $o ( keys %{ $allfns{$arg}{options_i} } ) {
                $conf{mode_options}{$arg}{$o} = $allfns{$arg}{options_i}{$o};
                $conf{mode_options_reverse}{$o}{$arg} = 1;
            }
        }

        if ( defined $allfns{$arg}{options_bool} ) {
            foreach my $o ( keys %{ $allfns{$arg}{options_bool} } ) {
                $conf{mode_options}{$arg}{$o} = $allfns{$arg}{options_bool}{$o};
                $conf{mode_options_reverse}{$o}{$arg} = 1;
                $conf{options_verify}{$arg}{$o} = \&check_and_convert_bool;
            }
        }

        if ( defined $allfns{$arg}{secondary} ) {
            foreach my $sec ( @{ $allfns{$arg}{secondary} } ) {

                # If this is set then take the value it was set to.
                if ( defined $sec_args{ $sec->{arg_long} } ) {
                    $conf{mode_options}{$arg}{ $sec->{arg_long} } =
                      $sec_args{ $sec->{arg_long} };
                } else {

                    # Else set it to the default for this mode.
                    $conf{mode_options}{$arg}{ $sec->{arg_long} } =
                      $sec->{default};
                }

                if ( defined $sec->{verify} ) {
                    $conf{options_verify}{$arg}{ $sec->{arg_long} } =
                      $sec->{verify};
                }

                $conf{mode_options_reverse}{ $sec->{arg_long} }{$arg} = 1;
            }
        }
    }

    return $mode;
}

###############################################################################
#
# Elan statistics.
#
###############################################################################

# Work around problems with the "hex" function and whilst we are at it
# avoid warnings as well.  Unfortunately hex can't deal with anything
# bigger than 2^31 without giving an error so simply +0 on the string to
# convert it to a int cleanly (GNAT 7945).
sub _hex {
    my $str = shift;
    if ( not defined $str ) {
        return 0;
    } elsif ( $str eq '0xffffffffffffffff' ) {
        return -1;
    } else {

        if ( length $str < 9 ) {
            return hex $str;
        }

        # It was hard to write, it's supposed to be hard to read.

        $str =~ s/\A0x//;
        my $lower = hex( "0x" . substr( "0" x 8 . $str, -8 ) );
        my $upper = hex( "0x" . substr( "0" x 16 . $str, -16, 8 ) );
        $lower += ( 0x10000000 * 0x10 * $upper );

        return $lower;
    }
}

sub estats_sum_attr {
    my ( $current, $sum_so_far ) = @_;

    if ( defined $sum_so_far->{raw}[0]
        and $sum_so_far->{raw}[0] != $current->{raw}[0] )
    {
        $sum_so_far->{raw}[0] = undef;
    }

    return $sum_so_far;
}

sub estats_sum_bin {
    my ( $current, $sum_so_far ) = @_;

    for ( my $j = 0 ; $j < 32 ; $j++ ) {
        $sum_so_far->{raw}[$j] += $current->{raw}[$j];
    }

    #check min
    if (
        ( $sum_so_far->{raw}[32] == -1 )
        or (    ( $current->{raw}[32] != -1 )
            and ( $current->{raw}[32] < $sum_so_far->{raw}[32] ) )
      )
    {
        $sum_so_far->{raw}[32] = $current->{raw}[32];
    }

    #check max
    if ( $current->{raw}[33] > $sum_so_far->{raw}[33] ) {
        $sum_so_far->{raw}[33] = $current->{raw}[33];
    }

    #total
    $sum_so_far->{raw}[34] += $current->{raw}[34];

    return $sum_so_far;
}

sub estats_sum_counter {
    my ( $current, $sum_so_far ) = @_;

    $sum_so_far->{raw}[0] += $current->{raw}[0];
    return $sum_so_far;
}

sub estats_sum_tally {
    my ( $current, $sum_so_far ) = @_;

    for ( my $j = 0 ; $j < 3 ; $j++ ) {
        $sum_so_far->{raw}[$j] += $current->{raw}[$j];
    }

    return $sum_so_far;
}

my @scales = (
    'Bytes',     'Kilobytes', 'Megabytes', 'Gigabytes',
    'Terabytes', 'Petabytes', 'Exabytes'
);

my @bin_names = (
    '0 bytes',   '1 byte',    '2 bytes',   '4 bytes',
    '8 bytes',   '16 bytes',  '32 bytes',  '64 bytes',
    '128 bytes', '256 bytes', '512 bytes', '1kb',
    '2kb',       '4kb',       '8kb',       '16kb',
    '32kb',      '64kb',      '128kb',     '256kb',
    '512kb',     '1mb',       '2mb',       '4mb',
    '8mb',       '16mb',      '32mb',      '64mb',
    '128mb',     '256mb',     '512mb',     'overflow'
);

sub estats_show_counter {
    my ($d) = @_;

    my $ret = $EMPTY_STRING;
    my $toshow;
    foreach my $counter ( sort keys %{$d} ) {

        if ( $d->{$counter}{raw}[0] != 0 or $conf{show_all_stats} ) {
            if ( defined $toshow ) {
                $ret .=
"  Counter: '$d->{$toshow}{name}' = '$d->{$toshow}{raw}[0]','$d->{$counter}{name}' = '$d->{$counter}{raw}[0]'\n";
                undef $toshow;
            } else {
                $toshow = $counter;
            }
        }
    }

    if ( defined $toshow ) {
        $ret .= "  Counter: '$d->{$toshow}{name}' = '$d->{$toshow}{raw}[0]'\n";
    }

    return $ret;
}

sub estats_show_attr {

    # Hopefully have an array at this point.
    my ($d) = @_;

    my $ret = $EMPTY_STRING;
    my $toshow;
    foreach my $attr ( sort keys %{$d} ) {
        next unless defined $d->{$attr}{raw}[0];
        if ( defined $toshow ) {
            $ret .=
"  Attribute: '$d->{$toshow}{name}' = '$d->{$toshow}{raw}[0]', '$d->{$attr}{name}' = '$d->{$attr}{raw}[0]'\n";
            undef $toshow;
        } else {
            $toshow = $attr;
        }

    }
    if ( defined $toshow ) {
        $ret .=
          "  Attribute: '$d->{$toshow}{name}' = '$d->{$toshow}{raw}[0]'\n";

    }
    return $ret;
}

sub estats_show_tally {
    my ($d) = @_;

    my $ret = $EMPTY_STRING;
    foreach my $tally ( sort keys %{$d} ) {
        if ( $d->{$tally}{raw}[0] or $conf{show_all_stats} ) {
            $ret .= sprintf
              "%16s: Total: %d Active: %d HWM: %d\n",
              $d->{$tally}{name}, $d->{$tally}{raw}[0],
              $d->{$tally}{raw}[1], $d->{$tally}{raw}[2];
        }
    }
    return $ret;
}

sub estats_show_bin {
    my ($d) = @_;

    my $ret = $EMPTY_STRING;
    foreach my $bin ( sort keys %{$d} ) {

        if ( ( $d->{$bin}{raw}[0] || $d->{$bin}{raw}[34] )
            or $conf{show_all_stats} )
        {
            my $total = $d->{$bin}{raw}[34];
            my $scale = 0;

            while ( $total > 1024 ) {
                $total /= 1024;
                $scale++;
            }

            $ret .= sprintf
"%16s: min $d->{$bin}{raw}[32] max $d->{$bin}{raw}[33] total $d->{$bin}{raw}[34] (%0.2f $scales[$scale])\n",
              $d->{$bin}{name}, $total;

            my @vals;
            for ( my $j = 0 ; $j < 32 ; $j++ ) {
                if ( $d->{$bin}{raw}[$j] or $conf{show_all_stats} > 1 ) {
                    push @vals,
                      sprintf '%9s: %10d',
                      $bin_names[$j], $d->{$bin}{raw}[$j];

                    if ( $#vals == 2 ) {
                        $ret .= sprintf "  %s\n", join( " ", @vals );
                        undef @vals;
                    }
                }
            }
            if ( $#vals != -1 ) {
                $ret .= sprintf "  %s\n", join( " ", @vals );
                undef @vals;
            }
        }
    }
    return $ret;
}

# These must stay in the correct order, that is the order they appear in
# shared memory.
my @stat_types = qw(Counter Tally Bin Attribute);

my @display_order = qw(Attribute Counter Tally Bin);

my %stat_types2 = (
    Counter => {
        size      => '1',
        displayfn => \&estats_show_counter,
        sumfn     => \&estats_sum_counter
    },
    Tally => {
        size      => '3',
        displayfn => \&estats_show_tally,
        sumfn     => \&estats_sum_tally
    },
    Bin => {
        size      => '35',
        displayfn => \&estats_show_bin,
        sumfn     => \&estats_sum_bin
    },
    Attribute => {
        size      => '1',
        displayfn => \&estats_show_attr,
        sumfn     => \&estats_sum_attr
    },
);

sub estats_parse_header {
    my ($block) = @_;
    my @a = split $COMMA, $block;

    my @header;

    if ( $a[0] ne 'ELAN STATS' or $a[1] ne 'falcon' ) {
        return;
    }

    my $index;
    for ( $index = 0 ; $index < 4 ; $index++ ) {
        $header[$index] = $a[$index];
    }

    while ( $index < $#a ) {
        $header[ $a[$index] ] = $a[ $index + 1 ];
        $index += 2;
    }

    return \@header;
}

# Convert from subsystem ID to name.
sub estats_get_sub_name {
    my ( $id, $header ) = @_;
    $id *= 2;
    $id += 4;
    return $header->[$id];
}

# Convert from subsystem ID and stat type # to count.
sub estats_get_sub_stat_count {
    my ( $id, $type, $header ) = @_;

    # Check for an invalid subsystem number.
    if ( $id >= _hex $header->[3] ) {
        return 0;
    }

    # Check for an invalid stat type.
    if ( $type >= _hex $header->[2] ) {
        return 0;
    }

    # Skip over the four entry header and expand.
    $id *= 2;
    $id += 4;

    # Move from subsystem name to offset.
    $id++;

    # Follow the offset.
    $id = $header->[$id];

    # Move to the correct type.
    $id += $type;
    return $header->[$id];
}

# Params:
# $id              Index of this subsystem.
# $type            This stat type.
# $idx             Number of this stat.
sub estats_get_sub_stat_name {
    my ( $id, $type, $idx, $header ) = @_;

    # Check for an invalid subsystem number.
    if ( $id >= _hex $header->[3] ) {
        return 0;
    }

    # Check for an invalid stat type.
    if ( $type >= _hex $header->[2] ) {
        return 0;
    }

    # Skip over the four entry header and expand.
    $id *= 2;
    $id += 4;

    # Move from subsystem name to offset.
    $id++;

    # Follow the offset.
    $id = $header->[$id];

    my $offset = $id;

    # Header[2] is the number of stats type's.  4 currently.
    $id += _hex $header->[2];

    for ( my $i = 0 ; $i < $type ; $i++ ) {
        $id += $header->[ $offset + $i ];
    }

    return $header->[ $id + $idx ];
}

sub estats_find_rail {
    my $r = shift;

    my $rail = _hex $r;

    if ( $rail == -1 ) {
        return 'ELAN_RAIL_ALL';
    } else {
        return $rail;
    }
}

sub estats_parse_content {
    my ( $block, $header ) = @_;
    my @a = split $COMMA, $block;
    my $index = 0;

    my @raw_data;

    return if ( $#a < 5 );

    for ( $index = 0 ; $index < 4 ; $index++ ) {
        $raw_data[$index] = _hex( $a[$index] );
    }

    while ( $index < $#a ) {
        $raw_data[ $a[$index] ] = $a[ $index + 1 ];
        $index += 2;
    }

    my %process_details;

    $process_details{vp}      = $raw_data[0];
    $process_details{nvp}     = $raw_data[1];
    $process_details{localid} = $raw_data[2];
    $process_details{nlocal}  = $raw_data[3];

    my $instbase = 4;

    while ( $instbase != 0 ) {
        my $sysid = _hex( $raw_data[$instbase] );
        my $sysname = estats_get_sub_name( $sysid, $header );

        my %inst;

        $inst{sysid}      = _hex( $raw_data[$instbase] );
        $inst{name}       = estats_get_sub_name( $sysid, $header );
        $inst{id}         = _hex $raw_data[ $instbase + 1 ];
        $inst{handle}     = $raw_data[ $instbase + 2 ];
        $inst{stats}      = _hex $raw_data[ $instbase + 6 ];
        $inst{rail}       = estats_find_rail( $raw_data[ $instbase + 4 ] );
        $inst{next}       = _hex $raw_data[ $instbase + 5 ];
        $inst{valid}      = _hex $raw_data[ $instbase + 3 ];
        $inst{debugFlags} = $raw_data[ $instbase + 7 ];

        if ( $inst{stats} ) {
            my %stats;

            my $offset = $inst{stats};

            for ( my $type = 0 ; $type < $#stat_types + 1 ; $type++ ) {
                my $typename = $stat_types[$type];
                my $count =
                  estats_get_sub_stat_count( $inst{sysid}, $type, $header );

                next if $count == 0;
                my %type;
                for ( my $idx = 0 ; $idx < $count ; $idx++ ) {
                    my %data;
                    my @raw;
                    for (
                        my $value = 0 ;
                        $value < $stat_types2{$typename}{size} ;
                        $value++
                      )
                    {
                        $raw[$value] = _hex $raw_data[$offset];
                        $offset++;
                    }
                    $data{name} =
                      estats_get_sub_stat_name( $inst{sysid}, $type, $idx,
                        $header );
                    $data{raw} = \@raw;

                    $type{ $data{name} } = \%data;
                }
                $stats{$typename} = \%type;
            }
            $inst{statistics} = \%stats;
        } else {
            $process_details{complete} = 0;
        }

        $instbase = $inst{next};

        delete $inst{stats};
        delete $inst{next};
        delete $inst{sysid};
        delete $inst{debugFlags} if ( !$inst{debugFlags} );

        $process_details{subsystems}{ $inst{name} }{ $inst{id} } = \%inst;

    }

    # Work out if there is missing data.
    if (
        defined $process_details{subsystems}{Core}{1}{statistics}{Counter}
        {Overflow} )
    {
        if ( not defined $process_details{complete} ) {
            if ( $process_details{subsystems}{Core}{1}{statistics}{Counter}
                {Overflow}{raw}[0] == 0 )
            {
                $process_details{complete} = 1;
            } else {
                $process_details{complete} = 0;
            }
        }
    }

    return \%process_details;
}

sub estats_total {
    my ($data_structures_aref) = @_;

    # Make an initial total by just copying the first set of stats carte
    # blance.
    my $summed_structure = dclone( $data_structures_aref->[0] );

    for ( my $cr = 1 ; $cr <= $#{$data_structures_aref} ; $cr++ ) {
        my $current_structure = $data_structures_aref->[$cr];

        # Copy the vp and nvp information, we might miss new entries in
        # current_structure but for the time being all entries are known to
        # be common.  XXX: This assertion no longer holds true, {complete}
        # is only defined where it is known and has values 0 and 1.  Having
        # said that it will all work though as if it's value is not known
        # it can't be 1 which is the only value we care about.
        foreach my $header ( keys %{$summed_structure} ) {
            next if ( $header eq 'subsystems' );
            if ( defined $summed_structure->{$header}
                and $summed_structure->{$header} ne
                $current_structure->{$header} )
            {
                $summed_structure->{$header} = undef;
            }
        }

        #add to each set of stats if it exists, else clone the new set
        foreach my $name ( keys %{ $current_structure->{subsystems} } ) {
            if ( $summed_structure->{subsystems}{$name} ) {
                foreach
                  my $id ( keys %{ $current_structure->{subsystems}{$name} } )
                {
                    if ( $summed_structure->{subsystems}{$name}{$id} ) {

                        next
                          unless (
                            defined $current_structure->{subsystems}{$name}{$id}
                            {statistics} );

                        if (
                            not( $summed_structure->{subsystems}{$name}{$id}
                                {statistics} )
                          )
                        {

                            $summed_structure->{subsystems}{$name}{$id}
                              {statistics} = dclone(
                                $current_structure->{subsystems}{$name}{$id}
                                  {statistics} );
                            next;
                        }

                        my %current_stat =
                          %{ $current_structure->{subsystems}{$name}{$id}
                              {statistics} };

                        my %summed_stat =
                          %{ $summed_structure->{subsystems}{$name}{$id}
                              {statistics} };

                        #add to each type of stats if it exists, else copy
                        #the new set
                        foreach my $stat_type ( keys %current_stat ) {
                            if ( $summed_stat{$stat_type} ) {

                                foreach my $stat_name (
                                    keys %{ $current_stat{$stat_type} } )
                                {
                                    if (
                                        $summed_stat{$stat_type}->{$stat_name} )
                                    {

                                #do the adding up correctly for the type of stat
                                        $summed_stat{$stat_type}->{$stat_name} =
                                          $stat_types2{$stat_type}{sumfn}(
                                            $current_stat{$stat_type}
                                              ->{$stat_name},
                                            $summed_stat{$stat_type}
                                              ->{$stat_name}
                                          );
                                    } else {
                                        $summed_stat{$stat_type}->{$stat_name} =
                                          dclone( $current_stat{$stat_type}
                                              ->{$stat_name} );
                                    }
                                }
                            } else {
                                $summed_stat{$stat_type} =
                                  dclone( $current_stat{$stat_type} );
                            }
                        }
                    } else {
                        $summed_structure->{subsystems}{$name}{$id} = dclone(
                            $current_structure->{subsystems}{$name}{$id} );
                    }
                }
            } else {
                $summed_structure->{$name} =
                  dclone( $current_structure->{$name} );
            }
        }
    }

    return $summed_structure;
}

# Convert from long to terse stats.
sub estats_summarise {

    my $datastructure = shift;
    my %ret           = (
        Bin     => 0,
        Counter => 0,
        Tally   => 0
    );
    if ( defined $datastructure->{vp} ) {
        $ret{vp} = $datastructure->{vp};
    }
    foreach my $subsystem ( keys %{ $datastructure->{subsystems} } ) {
        foreach my $id ( keys %{ $datastructure->{subsystems}{$subsystem} } ) {
            my $statistics =
              $datastructure->{subsystems}{$subsystem}{$id}{statistics};
            foreach my $bin ( keys %{ $statistics->{Bin} } ) {

                #Bin has a total value so just add that
                $ret{Bin} += $statistics->{Bin}{$bin}{raw}[34];
            }
            foreach my $counter ( keys %{ $statistics->{Counter} } ) {
                $ret{Counter} += $statistics->{Counter}{$counter}{raw}[0];
            }
            foreach my $tally ( keys %{ $statistics->{Tally} } ) {
                $ret{Tally} += $statistics->{Tally}{$tally}{raw}[0];
            }
        }
    }
    return \%ret;
}

sub estats_summarise_many {
    my $many = shift;
    my @ret;
    foreach my $single ( @{$many} ) {
        push @ret, estats_summarise($single);
    }
    return \@ret;
}

sub estats_collapse_summaries {
    my $summaries = shift;
    my %ret       = (
        Bin     => 0,
        Counter => 0,
        Tally   => 0
    );
    foreach my $summary ( @{$summaries} ) {
        foreach my $key ( keys %ret ) {
            $ret{$key} += $summary->{$key};
        }
    }
    return \%ret;
}

sub estats_display_hash {
    my $hash = shift;
    format WITH_VP =
vp @>>>> Counter @>>>>>>>>> Tally @>>>>>>>>> Bin @>>>>>>>>>>>>>>
$hash->{vp}, $hash->{Counter}, $hash->{Tally}, $hash->{Bin}
.
    format WITHOUT_VP =
Counter @>>>>>>>>> Tally @>>>>>>>>> Bin @>>>>>>>>>>>>>>
$hash->{Counter}, $hash->{Tally}, $hash->{Bin}
.
    local $~;
    if ( defined $hash->{vp} ) {
        $~ = 'WITH_VP';
    } else {
        $~ = 'WITHOUT_VP';
    }
    write STDOUT;
    return;
}

sub estats_display_hashes {
    my ( $hashes, $sort, $reverse ) = @_;
    my $ret = $EMPTY_STRING;

    my $rev = $reverse;

    $rev = not $rev if ( $sort eq 'vp' );

    if ($rev) {
        foreach my $e ( sort { $a->{$sort} <=> $b->{$sort} } ( @{$hashes} ) ) {
            $ret .= estats_display_hash($e);
        }
    } else {
        foreach
          my $e ( reverse sort { $a->{$sort} <=> $b->{$sort} } ( @{$hashes} ) )
        {
            $ret .= estats_display_hash($e);
        }
    }
    return $ret;
}

# FIXME:  This function really should be merged with as show_inst...
sub estats_show_name {
    my ( $des, $stats ) = @_;

    if ( not defined $des ) {
        return estats_show_inst($stats);
    }

    my @req = split( "\\.", $des );

    my $ret = $EMPTY_STRING;

    foreach my $name2 ( sort keys %{ $stats->{subsystems} } ) {
        my $name = $stats->{subsystems}{$name2};

        next unless ( lc $name2 eq lc $req[0] );

        foreach my $id2 ( sort { $a <=> $b } keys %{$name} ) {
            my $sis = $name->{$id2};

            next if ( $#req > 0 and $sis->{id} ne $req[1] );

            if ( $#req < 2 ) {
                $ret .=
"Subsystem '$sis->{name}' id: $sis->{id}  Handle: $sis->{handle} rail: $sis->{rail}\n";
            }

            foreach my $type (@display_order) {
                next unless defined $sis->{statistics}{$type};

                if ( $#req > 1 ) {

                    foreach
                      my $s_name ( sort keys %{ $sis->{statistics}{$type} } )
                    {
                        next if ( $#req > 1 and lc $s_name ne lc $req[2] );
                        $ret .= "@{$sis->{statistics}{$type}{$s_name}{raw}}\n";
                    }
                } else {
                    if ( defined $stat_types2{$type}{displayfn} ) {
                        $ret .=
                          $stat_types2{$type}{displayfn}(
                            $sis->{statistics}{$type} );
                    }
                }
            }
        }
    }
    return $ret;
}

sub estats_show_inst {
    my ($stats) = @_;

    my $ret;

    if ( defined $stats->{vp} ) {
        $ret = "This is vp $stats->{vp}/$stats->{nvp}\n";
    } else {
        $ret = "Statistics for a $stats->{nvp} process job\n";
    }

    foreach my $name2 ( sort keys %{ $stats->{subsystems} } ) {
        my $name = $stats->{subsystems}{$name2};

        foreach my $id2 ( sort { $a <=> $b } keys %{$name} ) {
            my $sis = $name->{$id2};

            $ret .=
"Subsystem '$sis->{name}' id: $sis->{id}  Handle: $sis->{handle} rail: $sis->{rail}\n";

            if ( not defined $sis->{statistics} ) {
                $ret .= "no statistics recorded.\n";
                next;
            }

            foreach my $type (@display_order) {
                next unless defined $sis->{statistics}{$type};

                if ( defined $stat_types2{$type}{displayfn} ) {
                    $ret .=
                      $stat_types2{$type}{displayfn}(
                        $sis->{statistics}{$type} );

                }
            }
        }
    }
    return $ret;
}

sub estats_read_stats {
    my @data = @_;

    my $header = estats_parse_header( shift @data );

    return unless $header;

    my @out;
    foreach my $vp (@data) {
        my $parsed = estats_parse_content( $vp, $header );
        if ( defined $parsed ) {
            push @out, $parsed;
        }
    }

    return \@out;
}

sub estats_show_stats {
    my $d = shift;

    # This function is slightly delicate, the --full-report option calls
    # this function with $stats_total and $group set.

    # What to do about the -r option: If it's set then display individual
    # results for the given vp's only, if it's not set then display a total
    # for everyone.

    if ( not $d ) {
        print "QsNet Statistics not valid\n";
        return;
    }

    my $stats_total = 0;
    my $group       = 0;

    if ($stats_total) {

        if ( $conf{stats_short} ) {
            my $new;
            if ( defined $rank_rng ) {
                my @ret;
                my $rng = rng_dup($rank_rng);
                while ( defined( my $rank = rng_shift($rng) ) ) {
                    if ( defined $d->[$rank] ) {
                        push @ret, estats_summarise( $d->[$rank] );
                    } else {
                        my $vps = $#{$d} + 1;
                        print "Invalid rank $rank (0 to $vps)\n";
                    }
                }
                $new = \@ret;
            } else {
                $new = estats_summarise_many($d);
            }

            estats_display_hashes( $new, $conf{stats_sort_key},
                $conf{stats_reverse} );
            return;
        }

        if ( defined $rank_rng ) {
            my $rng = rng_dup($rank_rng);
            while ( defined( my $rank = rng_shift($rng) ) ) {
                if ( defined $d->[$rank] ) {
                    print estats_show_name( $conf{stats_name}, $d->[$rank] );
                } else {
                    my $vps = $#{$d} + 1;
                    print "Invalid rank $rank (0 to $vps)\n";
                }
            }
        } else {
            print estats_show_name( $conf{stats_name}, estats_total($d) );
        }
    }

    if ($group) {
        print group_status($d);
    }
    return;
}

###############################################################################
#
# Group deadlock detection
#
###############################################################################

sub group_status_helper {
    my $str        = shift;    # tagged onto the end of the line.
    my $possessive = shift;    # syntax to use (possessive/attributive)
    my $size       = shift;    # size of the group
    my @identical  = (@_);     # member list
    my $ret;
    my $sstr = defined $size ? " (size $size)" : $EMPTY_STRING;

    my $members = 'members';
    my $are     = 'are';
    my $have    = 'have';

    if ( $#identical == 0 ) {
        $members = 'member';
        $are     = 'is';
        $have    = 'has';
    }

    if ($possessive) {
        $are = $have;
    }

    $ret .= sprintf "Group $members %s$sstr $are $str.\n",
      rng_convert_to_user( rng_create_from_array(@identical) );

    return $ret;
}

sub group_status {
    my $data_structures_aref = shift;

    my %ad;

    my %tg;

    if ( $#target_groups != -1 ) {
        foreach my $gid (@target_groups) {
            $tg{$gid}++;
        }
    }

    # Loop over each vp...
    foreach my $dataset ( @{$data_structures_aref} ) {

        # Loop over each group within the process.
        foreach my $gid ( keys %{ $dataset->{subsystems}{Group} } ) {

            if ( $#target_groups != -1 ) {
                next unless defined $tg{$gid};
            }

            my $str;

            my $this_group = $dataset->{subsystems}{Group}{$gid};

            my $ident = $dataset->{vp};

            if ( $this_group->{statistics} ) {

                # XXX: Why is this first test here,
                if (    $this_group->{statistics}{Attribute}
                    and $this_group->{statistics}{Attribute}{Self} )
                {
                    $ident = $this_group->{statistics}{Attribute}{Self}{raw}[0];
                    $ad{$gid}{size} =
                      $this_group->{statistics}{Attribute}{Size}{raw}[0];
                    $ad{$gid}{map}[$ident] = $dataset->{vp}
                      if ( $conf{show_group_members} );
                }

                $ad{$gid}{idents}{$ident}{statistics}++;

                foreach my $tally ( keys %{ $this_group->{statistics}{Tally} } )
                {
                    my $name = $this_group->{statistics}{Tally}{$tally}{name};
                    my $number =
                      $this_group->{statistics}{Tally}{$tally}{raw}[0];
                    my $active =
                      $this_group->{statistics}{Tally}{$tally}{raw}[1];
                    if ( $active != 0 ) {
                        $ad{$gid}{active}{$name}++;
                        $ad{$gid}{idents}{$ident}{active}{$name} = $number;
                    } else {
                        $ad{$gid}{idents}{$ident}{inactive}{$name} = $number;
                    }
                }
            }
            $ad{$gid}{idents}{$ident}{valid} = $this_group->{valid};
        }
    }

    my $ret = $EMPTY_STRING;
    my $missing_self;
    my $i_count = 0;    # Interesting groups.
    my $d_count = 0;    # Destroyed groups.
    foreach my $gid ( sort { $a <=> $b } keys %ad ) {

        if ( $#target_groups != -1 ) {
            next unless defined $tg{$gid};
        }

        my $gstr = "Information for group '$gid'\n";

        # Maybe show the group members, hope that the user doesn't turn
        # this on unless also setting target_groups!
        if ( $conf{show_group_members} ) {
            $gstr .= "group has $ad{$gid}{size} members\n";
            if ( defined $ad{$gid}{size} and $gid != 1 ) {
                for ( my $ident = 0 ; $ident < $ad{$gid}{size} ; $ident++ ) {
                    $gstr .=
                      "group member[$ident] => vp[$ad{$gid}{map}[$ident]]\n";
                }
            }
        }

        my $gone;
        {
            my @invalid;
            foreach my $ident ( sort keys %{ $ad{$gid}{idents} } ) {
                if ( $ad{$gid}{idents}{$ident}{valid} == 0 ) {
                    push @invalid, $ident;
                }
            }
            if ( $#invalid != -1 ) {
                if ( $conf{show_all_groups} ) {
                    $ret .= $gstr
                      . group_status_helper( 'showing the group as removed',
                        0, $ad{$gid}{size}, @invalid );
                    $gstr = $EMPTY_STRING;
                }
                if ( $#invalid == ( $ad{$gid}{size} - 1 ) ) {
                    $gone++;
                    $d_count++;
                }
            }
        }
        next if $gone;

        # Find and report groups which don't have statistics
        {
            my @identical;
            foreach my $ident ( sort keys %{ $ad{$gid}{idents} } ) {
                push @identical, $ident
                  unless ( $ad{$gid}{idents}{$ident}{statistics} );
            }
            if ( $#identical != -1 ) {
                $missing_self++;
                if ( $conf{show_all_groups} ) {
                    $ret .= $gstr
                      . group_status_helper(
                        'no statistics for this group *(1)',
                        1, $ad{$gid}{size}, @identical );
                    $gstr = $EMPTY_STRING;
                } else {
                    $gstr .=
                      group_status_helper( 'no statistics for this group *(1)',
                        1, $ad{$gid}{size}, @identical );
                }
            }
        }

        if ( $ad{$gid}{active} ) {
            $i_count++;

            # For all collective calls which we are interested in
            foreach my $s ( keys %{ $ad{$gid}{active} } ) {
                my %active;
                my %inactive;

                foreach my $ident ( keys %{ $ad{$gid}{idents} } ) {
                    if ( defined $ad{$gid}{idents}{$ident}{active}
                        and $ad{$gid}{idents}{$ident}{active}{$s} )
                    {
                        my $number = $ad{$gid}{idents}{$ident}{active}{$s};
                        push @{ $active{$number} }, $ident;
                    } elsif ( $ad{$gid}{idents}{$ident}{inactive}{$s} ) {
                        my $number = $ad{$gid}{idents}{$ident}{inactive}{$s};
                        push @{ $inactive{$number} }, $ident;
                    }
                }
                foreach my $number ( sort keys %active ) {
                    $ret .= $gstr
                      . group_status_helper( "in call $number to $s",
                        0, $ad{$gid}{size}, @{ $active{$number} } );
                    $gstr = $EMPTY_STRING;

                }
                foreach my $number ( sort keys %inactive ) {
                    $ret .= group_status_helper( "completed call $number to $s",
                        1, $ad{$gid}{size}, @{ $inactive{$number} } );
                }
            }
        } else {
            next unless ( $conf{show_all_groups} );
        }

        {
            my @inactive;
            foreach my $ident ( sort keys %{ $ad{$gid}{idents} } ) {
                if ( $ad{$gid}{idents}{$ident}{statistics}
                    and not defined $ad{$gid}{idents}{$ident}{active} )
                {
                    push @inactive, $ident;
                }
            }
            if ( $#inactive != -1 ) {
                $ret .= $gstr
                  . group_status_helper( 'not in a call to the collectives',
                    0, $ad{$gid}{size}, @inactive );
                $gstr = $EMPTY_STRING;
            }
        }
    }

    my $count = keys %ad;

    if ( $count == 1 ) {
        my $use_str = ( $i_count == 1 ) ? $EMPTY_STRING : ' not';
        $ret .= "Total: $count group which is$use_str in use.\n";
    } else {
        my $d_str = ( $d_count == 1 ) ? 'is' : 'are';
        my $i_str = ( $i_count == 1 ) ? 'is' : 'are';
        $ret .=
"Total: $count groups of which $d_count $d_str destroyed and $i_count $i_str in use.\n";
    }

    if ($missing_self) {
        $ret .= "\n(1) Groups that have no statistics are reported by vp\n";
        $ret .= "rather than group id\n";
    }

    return "$ret";
}

###############################################################################
#
# Local (per node) stats.
#
###############################################################################

sub local_stats_from_job {
    my $job = shift;

    print "Showing local job $job\n";

    my $key = rms_job_to_key($job);

    if ( not defined $key ) {
        print "Cannot find key for local job $job\n";
        return;
    }

    my @data;
    open my $PCMD, '-|', "edb -k $key --stats-raw 2>/dev/null"
      or confess "$prog: cant open file: $!\n";
    local $/ = "\n\n";
    while (<$PCMD>) {
        s/\n//g;
        push @data, $_;
    }
    close $PCMD;

    my $s = estats_read_stats(@data);

    # $stats_total = 1;

    estats_show_stats($s);
    return;
}

# Show stats for all jobs on this node.
sub local_stats {
    my @files = slurp_dir('/proc/rms/programs');

    foreach my $job (@files) {
        next if ( $job eq '..' );
        next if ( $job eq '.' );

        local_stats_from_job($job);

    }
    return;
}

###############################################################################
#
# Stack trace tree compression.
#
###############################################################################

#
# Compare two lists-o-strings
#	\@l1 (IN)	list1
#	\@l2 (IN)	list2
#	RETURN		1 if match, 0 if not
#
sub cmp_list {
    my ( $l1, $l2 ) = @_;

    if ( $#{$l1} != $#{$l2} ) {
        return 0;
    }

    foreach my $i ( 0 .. $#{$l1} ) {
        if ( !defined( ${$l2}[$i] ) || ${$l1}[$i] ne ${$l2}[$i] ) {
            return 0;
        }
    }

    return 1;
}

###############################################################################
#
# RMS support.
#
###############################################################################

sub find_exe {
    my $name = shift;
    foreach my $dir ( split $COLON, $ENV{PATH} ) {
        return 1 if ( -x "$dir/$name" );
    }
    return 0;
}

sub rms_is_installed {
    return ( find_exe('prun') and find_exe('rmsquery') );
}

sub rms_get_jobs {
    my $user = shift;
    my @res =
`rmsquery "select jobs.name from jobs,resources where jobs.status=\'running\' and jobs.resource = resources.name and resources.username=\'$user\'"`;
    chomp @res;
    return @res;
}

sub rms_job_is_running {
    my $job    = shift;
    my $status = `rmsquery "select status from jobs where name=\'$job\'"`;
    chomp $status;
    return ( $status eq 'running' );
}

sub rms_job_to_key {
    my $job = shift;
    return ( $job << 9 ) - 1;
}

sub rms_setup_pcmd {
    my $job = shift;

    my $res = rms_job_to_resource($job);

    my $ncpus = rms_job_to_ncpus($job);

    my $nhosts = rms_job_to_nhosts($job);

    if ( $res eq $EMPTY_STRING ) {
        print "Job '$job' doesn't have a associated resource\n";
        return;
    }

    # Try to prevent zombie jobs, fairly rare but I have seen nodes run
    # different versions of edb which can cause problems XXX: Fixme.  This
    # isn't high enough.
    if ( $conf{prun_exittimeout} != 0 ) {
        $ENV{RMS_EXITTIMEOUT} = $conf{prun_exittimeout};
    }

    if ( $conf{prun_timeout} != 0 ) {
        $ENV{RMS_TIMELIMIT} = $conf{prun_timeout};
    }

    {

        # Work around a couple of bugs in RMS the first one is really old
        # and was there for a while, the second one is limited to
        # 'qsrmslibs-2.82-15'
        my $partition = rms_res_to_partition($res);
        $ENV{RMS_PARTITION}  = $partition;
        $ENV{RMS_RESOURCEID} = "$partition.$res";
    }

    my $cmd = "prun -i /dev/null -T $res";

    return ( $cmd, $ncpus, $nhosts );
}

# Not exported...
sub rms_job_to_resource {
    my $job = shift;
    my $res = `rmsquery "select resource from jobs where name=\'$job\'"`;
    chomp $res;
    return $res;
}

sub rms_job_to_ncpus {
    my $job   = shift;
    my $cpus  = `rmsquery "select cpus from jobs where name=\'$job\'"`;
    my $nodes = `rmsquery "select nodes from jobs where name=\'$job\'"`;

    chomp $cpus;
    chomp $nodes;

    my $ncpus = 0;

    my @c =
      map { $_ =~ /(\d+)-(\d+)/ ? $2 - $1 + 1 : 1 } ( split $SPACE, $cpus );

    my @n =
      map { $_ =~ /(\d+)-(\d+)/ ? $2 - $1 + 1 : 1 } ( split $SPACE, $nodes );

    for ( my $idx = 0 ; $idx <= $#n ; $idx++ ) {
        $ncpus += $n[$idx] * $c[$idx];
    }

    print "extracted $ncpus from $cpus and $nodes\n" if $conf{verbose} > 1;

    return $ncpus;
}

sub rms_job_to_nhosts {
    my $job      = shift;
    my $nodespec = `rmsquery "select hostnames from jobs where name=\'$job\'"`;

    chomp $nodespec;
    my $i;
    my @nodelist;
    my $prefix;
    my $suffix;

    # deal with multiple entries
    foreach ( split $SPACE, $nodespec ) {
        if (m/([^\[]+)\[([0-9-,]+)\]([^\[]*)/) {
            $prefix = $1;
            $suffix = $3;

            foreach ( split $COMMA, $2 ) {
                if ( !m/([0-9]+)-?([0-9]+)?/ ) {
                    print "malformed nodespec '$_'\n";
                    exit 1;
                }

                if ( defined $2 ) {

                    # square braces with range, eg 'machine[0-3]'
                    for ( $i = $1 ; $i <= $2 ; $i++ ) {
                        push @nodelist, $prefix . $i . $suffix;
                    }
                } else {

                    # no range, just suffix
                    push @nodelist, $prefix . $1 . $suffix;
                }
            }
        } else {

            # no square braces, just node name, eg 'machine0'
            if ( !m/([^\[]+)([0-9]+)([^\[]*)/ ) {
                print "malformed nodespec '$_'\n";
                exit 1;
            }

            push @nodelist, $1 . $2 . $3;
        }
    }

    return $#nodelist + 1;
}

sub rms_res_to_partition {
    my $res  = shift;
    my $part = `rmsquery "select partition from resources where name=\'$res\'"`;
    chomp $part;
    return $part;
}

###############################################################################
#
# Slurm support.
#
###############################################################################

sub slurm_is_installed {
    return ( find_exe('srun') and find_exe('squeue') and find_exe('scontrol') );
}

sub slurm_get_jobs {
    my $user = shift;
    my @res  = slurp_cmd("squeue -t running -u $user -h -o %i");
    chomp @res;
    return @res;
}

# Query the process count for the "step" as that's how many processes we
# are going to be looking for.
sub slurm_job_to_ncpus {
    my $job   = shift;
    my $s     = "$job." . $conf{slurm_job_step};
    my @steps = slurp_cmd("squeue -s $s -o %i,%A");

    # The %A option is new so ensure we have the TASKS output before we
    # believe what we see here...  Mind you %A is several years old now so
    # if it's not there we probably can't do anything anyway.
    my $tasks;
    my $have_tasks = 0;
    chomp @steps;
    foreach my $step (@steps) {
        my ( $step, $cpus ) = split $COMMA, $step;
        $tasks      = $cpus if ( $step eq $s );
        $have_tasks = 1     if ( $cpus eq 'TASKS' );
    }
    return $tasks if $have_tasks;
    return;
}

# Query the nodecount for the "job" as that is what we shall be running on.
sub slurm_job_to_nodecount {
    my $job  = shift;
    my @jobs = slurp_cmd('squeue -o %i,%D');

    chomp @jobs;
    foreach my $step (@jobs) {
        my ( $sj, $ncount ) = split $COMMA, $step;
        return $ncount if ( $sj eq $job );
    }
    return;
}

# Query the node list for the "step" which isn't the same as the node list
# for the job, care should be taken if using this function to ensure this
# is correct.  This functions isn't used currently.
sub slurm_job_to_nodelist {
    my $job   = shift;
    my $s     = "$job." . $conf{slurm_job_step};
    my @steps = slurp_cmd("squeue -s $s -o %i,%N");

    chomp @steps;
    foreach my $step (@steps) {
        my ( $sj, $nlist ) = split $COMMA, $step;
        return $nlist if ( $sj eq $s );
    }
    return;
}

sub slurm_job_is_running {
    my $job    = shift;
    my @s      = slurp_cmd("squeue -h -j $job -o %T");
    my $status = lc $s[0];
    chomp $status;
    return ( $status eq 'running' );
}

sub slurm_setup_job {
    my $job = shift;

    # After we have selected a job id and decided to target it make a
    # best-attempt effort to pick a sensible step_id.  List all the
    # step ids slurm thinks are running and pick the first one.
    # Previously this value just defaulted to zero.
    if ( not defined $conf{slurm_job_step} ) {
        my @all_steps = slurp_cmd("squeue -s -o %i");
        my @valid_steps;
        foreach my $step (@all_steps) {
            chomp $step;
            next if $step eq "STEPID";
            my ( $job_id, $job_step ) = split $PERIOD, $step;
            next unless $job_id == $job;
            push @valid_steps, $job_step;
        }
        if (@valid_steps) {
            config_set_internal( 'slurm_job_step', $valid_steps[0] );
        } else {
            print
              "Unable to determine any valid job steps, assuming step id 0\n";
            config_set_internal( 'slurm_job_step', 0 );
        }
    }

    my $cpus = slurm_job_to_ncpus($job);
    my $nc   = slurm_job_to_nodecount($job);

    my %pcmd;
    $pcmd{nprocesses} = $cpus;
    $pcmd{nhosts}     = $nc;
    $pcmd{command}    = "srun --jobid=$job";
    return %pcmd;

}

###############################################################################
#
# Local support.
#
###############################################################################

sub local_get_jobs {
    my $user = shift;
    return get_process_list($user);
}

sub local_fd_get_jobs_real {
    my $user = shift;
    my $file = shift;

    my @pids = get_process_list($user);

    my @jobs;

    foreach my $pid (@pids) {
        my @fds = slurp_dir("/proc/$pid/fd");
        foreach my $fd (@fds) {
            my $target = readlink "/proc/$pid/fd/$fd";
            next unless $target;
            if ( $target eq $file ) {
                push @jobs, $pid;
                last;
            }
        }
    }

    return @jobs;
}

sub local_fd_get_jobs {
    my $user = shift;
    return local_fd_get_jobs_real( $user, $conf{local_fd_name} );
}

sub local_q_is_installed {
    return ( -d '/proc/qsnet' );
}

sub local_q_get_jobs {
    my $user = shift;
    return local_fd_get_jobs_real( $user, '/proc/qsnet/elan/user' );
}

sub local_job_is_running {
    my $job = shift;
    return ( -d "/proc/$job" );
}

sub local_setup_job {
    my $job = shift;

    my $hostname = hostname();
    my %pcmd;
    $pcmd{nprocesses}                 = 1;
    $pcmd{nhosts}                     = 1;
    $pcmd{process_data}{$hostname}{0} = $job;
    @{ $pcmd{host_list} } = $hostname;
    return %pcmd;
}

###############################################################################
#
# mpd support.
#
###############################################################################

sub mpd_is_installed {
    return ( find_exe('mpdlistjobs') and find_exe('mpdrun') );
}

sub mpd_get_data {
    my @out = slurp_cmd('mpdlistjobs');
    my %jobs;
    my $job;
    my $host;
    my $pid;
    foreach my $l (@out) {
        my ( $key, $value ) = split "= ", $l;
        next unless $value;
        $key =~ s/ //g;
        chomp $value;
        if ( $key eq 'jobid' ) {
            my ( $j, undef ) = split "@", $value;
            $job = $j;
        }
        if ( $key eq 'username' ) {
            $jobs{$job}{user} = $value;
        }
        if ( $key eq 'host' ) {
            $host = $value;
            $jobs{$job}{host}{$value}++;
        }
        if ( $key eq 'pid' ) {
            $pid = $value;
        }
        if ( $key eq 'rank' ) {
            $jobs{$job}{pids}{$host}{$value} = $pid;
            if (   ( not defined $jobs{$job}{lastproc} )
                or ( $value > $jobs{$job}{lastproc} ) )
            {
                $jobs{$job}{lastproc} = $value;
            }
        }
    }
    return \%jobs;
}

# There is a bug here I think, $user isn't used anywhere which is probably
# bad.
sub mpd_get_jobs {
    my $user = shift;

    my $d    = mpd_get_data();
    my @jobs = keys %{$d};
    return @jobs;
}

sub mpd_setup_job {
    my $job = shift;

    my $d = mpd_get_data();

    my @hosts = keys %{ $d->{$job}{host} };
    my $i     = @hosts;

    my ( $fh, $fn ) = tempfile('/tmp/padb.XXXXXXXX');
    foreach my $host (@hosts) {
        print {$fh} "$host:1\n";
    }
    close $fh;

    my $cmd = "mpdrun -machinefile $fn -np $i";

    my %pcmd;
    $pcmd{nprocesses}   = $d->{$job}{lastproc} + 1;
    $pcmd{nhosts}       = @hosts;
    $pcmd{process_data} = $d->{$job}{pids};
    $pcmd{command}      = $cmd;
    @{ $pcmd{host_list} } = @hosts;
    $pcmd{cleanup_cb}     = \&unlink_file;
    $pcmd{cleanup_handle} = $fn;

    return %pcmd;
}

sub unlink_file {
    my $file = shift;
    unlink $file;
    return;
}

###############################################################################
#
# pbs support.
#
###############################################################################

my %pbs_tabjobs;

sub pbs_is_installed {
    return find_exe('qstat');
}

# Load a list of jobs from a given server, saving the server and the host list
# for each one.
sub pbs_get_lqsub {
    my ( $user, $server ) = @_;
    my $cmd = "qstat -n -u $user \@$server";

    my $job = undef;

    my @output = slurp_cmd($cmd);
    foreach my $line (@output) {

# If we have previously matched a job (see below) then extract the hostlist.
# This line of outpuas has the form:
# " xn8/0*2+xn9/0*2+xn10/0*2"
# all we care about from this is the hostname (xn[8-10]) so split on '+' and then
# strip everything after the first '/'
        if ( defined $job ) {

            $line =~ s/^ +//;    # suppress blank in front of line
            $line =~ s/^\+//;    # suppress first + sign
            my @champs = split( /\+/, $line );    # split by '+'
            foreach my $word (@champs) {
                my ($host) = split "/", $word;
                push( @{ $pbs_tabjobs{$job}{hosts} }, $host );
            }
            $job = undef;
            next;
        }

# See if this line of output matches a job id, it has to be of the correct user and running.
# If this is a running job set $job to it's identifier so the code above can match the hostlist
# which will be on the next line of output.
        my @parts = split $SPACE, $line;
        if ( $#parts == 10 and $parts[1] eq $user and $parts[9] eq 'R' ) {
            my ( $job_id, $job_server ) = split $PERIOD, $parts[0];

            if ( defined $pbs_tabjobs{$job_id}{server} ) {
                printf("Warning, job $job_id exists on multiple servers\n");
                next;
            }
            $job = $job_id;

# This test is perfectly "safe" and should never fail apart from the case where the job id
# and the server name don't fit inside the 15 characters allowed.  This can be worked around
# by setting the -w flag which tells pbs_pro to print up to 30 characters but that doesn't
# work on Torque or plain pbs so only check this value if the string is shorter than that.
            if ( length $parts[0] < 15 and $job_server ne $server ) {
                printf("Warning, job is listed with unexpected server\n");
            }
            $pbs_tabjobs{$job_id}{nproc}  = $parts[6];
            $pbs_tabjobs{$job_id}{server} = $server;
        }
    }
    return;
}

sub pbs_get_data {
    my $user = shift;
    return \%pbs_tabjobs if ( keys %pbs_tabjobs != 0 );

    my @servers;

    # Chose a list of servers to use, if one is specified by the user
    # it will appear in $conf{pbs_server} here.  If one is not set
    # load a list of available ones and use that.  It may be possible
    # to have multiple jobs on different servers with the same jobid,
    # if that is the case detect it in pbs_get_lqsub() and warn the
    # user.  This will then force the user to expliciatly chose one
    # of the servers.
    if ( defined $conf{pbs_server} ) {
        push @servers, $conf{pbs_server};
    } else {
        my @handle = slurp_cmd('qstat -fB');
        foreach my $line (@handle) {
            next if ( $line =~ /^\s+/ );    # skip if line begin with space
            if ( $line =~ /Server:/ ) {
                $line =~ s/^ +//;           # take off space at start
                my @champs = split( /\s+/, $line );    # split buff by space
                push( @servers, $champs[1] );
            }
        }
    }

    foreach my $server (@servers) {
        pbs_get_lqsub( $user, $server );               # get job list by qsub
    }
    return \%pbs_tabjobs;
}

sub pbs_get_jobs {
    my $user = shift;

    my $d = pbs_get_data($user);

    my @jobs = keys %{$d};
    return @jobs;
}

sub pbs_setup_job {
    my $job = shift;
    my $d   = pbs_get_data($target_user);

    my @hosts = @{ $d->{$job}{hosts} };

    config_set_internal( 'pbs_server', $d->{$job}{server} );

    my %pcmd;

    $pcmd{nprocesses} = $d->{$job}{nproc};
    $pcmd{nhosts}     = @hosts;
    @{ $pcmd{host_list} } = @hosts;

    return %pcmd;
}

###############################################################################
#
# lsf-mpich2 wrapper  support
# the jobs launched by mpich2_wrapper thanks to mpirun.lsf and #BSUB -a mpich2
# The job submission file looks like:
#
##! /bin/bash
##BSUB -J "JOB_NAME"
##BSUB -o JOB_NAME.%J
##BSUB -n 4
##BSUB -e JOB_NAME_err.%J
##BSUB -a mpich2
#mpirun.lsf ./mpi_prog
#
# lsf-ompi-wrapper support thanks to #BSUB -a openmpi and mpirun.lsf.
# The job submission file looks like:
##! /bin/bash
##BSUB -J "PP_SNDRCV"
##BSUB -o PP_SNDRCV.%J
##BSUB -n 4
##BSUB -e PP_SNDRCVerr.%J
##BSUB -a openmpi
#mpirun.lsf ./pp_sndrcv_spbl
#
###############################################################################

my %lsfmpi_tabjobs;

sub lsfmpi_is_installed {
    return ( find_exe('mpirun.lsf')
          and ( find_exe('mpich2_wrapper') or find_exe('openmpi_wrapper') ) );
}

sub lsf_get_line_ppid {
    my ( $ppid, $rank_pid, $rank_ppid, @handle ) = @_;
    my $ret_line;
    my $pid;
    foreach my $line (@handle) {
        $line =~ s/^ +//;    # take off leading space
        my @champs = split( /\s+/, $line );
        if ( $champs[$rank_ppid] == $ppid ) {
            $pid      = $champs[$rank_pid];
            $ret_line = $line;
            last;
        }
    }
    return ( $ret_line, $pid );
}

sub lsfmpi_get_mpiport {
    my ( $host, $portpath ) = @_;
    my $portfound = 0;
    my $port;
    my @handle = slurp_remote_cmd( $host, "cat $portpath" );
    foreach my $line (@handle) {
        if ( $line =~ /TaskStarter/ ) {
            my @champs = split( " ", $line );
            foreach my $word (@champs) {
                if ( $word eq "-p" ) {  # don't use =~ because may take --prefix
                    $portfound = 1;
                    next;
                }
                if ( $portfound == 1 ) {
                    $port = $word;
                    last;
                }
            }
            last;
        }
    }
    return $port;
}

sub lsfmpi_get_mpiproc {
    my ( $ppid, $host, $job ) = @_;
    my $rank_pid  = 0;
    my $rank_ppid = 1;
    my $proc;
    my $path_file;
    my $count_line = 0;
    my $mode;

    #get ps from the leading host(the one that start mpirun.lsf)
    my @handle =
      slurp_remote_cmd( $host, "ps -o pid= -o ppid= -o cmd= -u $target_user" );

    $count_line = @handle;
    for ( my $i = 0 ; $i < $count_line ; $i++ ) {    # to avoid loop
        my ( $line, $pid );
        next if ( !defined $ppid );
        ( $line, $pid ) =
          lsf_get_line_ppid( $ppid, $rank_pid, $rank_ppid, @handle );
        next if ( !defined $line );
        if ( $line =~ /mpi/ && $line =~ /-configfile/ ) {
            my @champs = split( " ", $line );
            foreach my $word (@champs) {
                if ( $word eq "-configfile" ) {
                    $mode = 'mpich2';
                    next;
                }
                if ( defined $mode ) {
                    $path_file = $word;    # get path of -configfile
                    $proc      = $pid;
                    last;
                }
            }
            if ( $path_file =~ /$job\.newconf$/ )
            {    # format is .mpich2_wrapper.jobid.newconf
                last;
            }
            $path_file = undef;
        } elsif ( $line =~ /mpi/ && $line =~ /-app/ ) {
            my @champs = split( " ", $line );
            foreach my $word (@champs) {
                if ( $word eq "--app" ) {
                    $mode = 'openmpi';
                    next;
                }
                if ( defined $mode ) {
                    $path_file = $word;    # get path file of --app param
                    $proc      = $pid;
                    last;
                }
            }
            if ( $path_file =~ /$job$/ ) {    # format is .openmpi_appfile_jobid
                last;
            }
            $path_file = undef;
        } else {
            $ppid = $pid;
        }
    }
    return ( $proc, $path_file, $mode );
}

sub lsf_get_jobpgid {
    my ($jobid) = @_;
    my $resfound = 0;
    my @proc;
    my $cmd    = "bjobs -l $jobid ";
    my @handle = slurp_cmd($cmd);
    foreach my $line (@handle) {
        if ( $line =~ /Resource usage collected./i ) {
            $resfound = 1;
            next;
        }
        if ( $resfound == 1 ) {
            $line =~ s/^ +//;    # take off space at start
            if ( $line =~ /^PGID:/i ) {
                my @champs = split( " ", $line );
                my $pgid = $champs[1];
                chop($pgid) if ( $pgid =~ /;$/ );
                push( @proc, $pgid );
                my $firstpid = 0;
                foreach my $word (@champs) {
                    if ( $word =~ /^PIDs:/ ) {
                        $firstpid = 1;
                        next;
                    }
                    if ( $firstpid == 1 ) {
                        push( @proc, $word );
                    }
                }
                last;
            }
        }
    }
    return (@proc);
}

sub lsfmpi_get_hostport {
    my $job = shift;
    my $d   = lsfmpi_get_data();
    my $host;
    my $port;
    my $mpirunpid;
    my $path_port;
    my $lsf_mode;

    my @hosts = @{ $d->{$job}{hosts} } if ( defined $d->{$job}{hosts} );

    $host = $hosts[0] if ( defined $hosts[0] );

    #get the pgid of the job(first job pid)
    my @pgid = lsf_get_jobpgid($job);
    my $ppid = $pgid[0];

    #get the port of the leading proc (mpirun proc port)
    if ( defined $ppid and defined $host ) {
        ( $mpirunpid, $path_port, $lsf_mode ) =
          lsfmpi_get_mpiproc( $ppid, $host, $job );
        $d->{$job}{lsf_mode} = $lsf_mode;    # can be 'mpich2' or 'openmpi'
        $port = lsfmpi_get_mpiport( $host, $path_port )
          if ( defined($path_port) );
    }
    return ( $host, $mpirunpid, $port );
}

sub lsfmpi_get_lbjobs {
    my $jobidfound  = 0;
    my $found_title = 0;
    my $jobid;
    my $rank_jobid   = 0;
    my $rank_user    = 1;
    my $rank_stat    = 2;
    my $rank_ehost   = 5;
    my $rank_jobname = 6;
    my $cmd          = "bjobs -r -u $target_user ";
    my @output       = slurp_cmd($cmd);
    foreach my $line (@output) {
        $line =~ s/^ +//;    # suppress blank in front of line
        my @champs = split( /\s+/, $line );
        next if ( $champs[$rank_jobid] eq 'JOBID' );
        next if ( $#champs == -1 );    # empty line
        if ( $#champs != 0 ) {         # line with many fields is first line
            $jobid = undef;
            $jobid = $champs[$rank_jobid];
            my @ehosts = split( '\*', $champs[$rank_ehost] );
            $lsfmpi_tabjobs{$jobid}{nproc} = $ehosts[0];
            my $exec_host = $ehosts[1];
            push( @{ $lsfmpi_tabjobs{$jobid}{hosts} }, $exec_host )
              if ( defined($exec_host) );
        } elsif ( defined $jobid )
        {    # line with one field, should be continued line(exec_host)
            my @ehosts = split( '\*', $champs[0] );
            my $exec_host = $ehosts[1];
            chomp($exec_host);
            $lsfmpi_tabjobs{$jobid}{nproc} += $ehosts[0];    # nprocess
            push( @{ $lsfmpi_tabjobs{$jobid}{hosts} }, $exec_host );
        }
    }
}

sub lsfmpi_get_data {
    return \%lsfmpi_tabjobs if ( keys %lsfmpi_tabjobs != 0 );
    lsfmpi_get_lbjobs();    # get job list by bjobs
    return \%lsfmpi_tabjobs;
}

sub lsfmpi_get_jobs {
    my $user = shift;
    my @ret_jobs;
    my $d    = lsfmpi_get_data();
    my @jobs = keys %{$d};

    # filter other jobs that aren't launched by mpich2_wrapper
    # (for exemple by mpd; mpiexec; in the submitted job)
    # to do this we have criteria below
    # jobs launched by mpich2_wrapper will have -configfile parameter
    # jobs launched by ompi_wrapper will have --app parameter
    foreach my $job (@jobs) {
        my ( $server, $mpirpid, $port ) = lsfmpi_get_hostport($job);
        if ( defined($mpirpid) and defined($port) and defined($server) ) {
            $d->{$job}{server}  = $server;
            $d->{$job}{mpirpid} = $mpirpid;
            $d->{$job}{port}    = $port;
            push( @ret_jobs, $job );
        }
    }
    return @ret_jobs;
}

sub lsfmpi_setup_pcmd {
    my $job = shift;
    my $cmd;
    my $index = 0;
    my %pcmd;
    my $d = lsfmpi_get_data();

    my ( $server, $mpirpid, $port );

    $server  = $d->{$job}{server};
    $mpirpid = $d->{$job}{mpirpid};
    $port    = $d->{$job}{port};
    config_set_internal( 'lsf_mode',       $d->{$job}{lsf_mode} );
    config_set_internal( 'lsfmpi_server',  $server );
    config_set_internal( 'lsfmpi_mpirpid', $mpirpid );
    config_set_internal( 'lsfmpi_port',    $port );
    my @hosts = @{ $d->{$job}{hosts} };
    $pcmd{nprocesses} = $d->{$job}{nproc};
    $pcmd{nhosts}     = @hosts;
    @{ $pcmd{host_list} } = @hosts;

    return %pcmd;
}

sub get_pids_ppid {

    #   get all pids from ppid be careful about defunct
    my ( $ppid, $rank_pid, $rank_ppid, @handle ) = @_;
    my $pid;
    my @proc;
    foreach my $line (@handle) {
        $line =~ s/^ +//;    # take off leading space
        my @champs = split( /\s+/, $line );
        next if ( $champs[$rank_pid] eq 'PID' );
        if ( $champs[$rank_ppid] == $ppid ) {
            $pid = $champs[$rank_pid];
            if ( $line =~ /defunct/i ) {
                next;
            }
            push( @proc, $pid );
        }
    }
    return (@proc);
}

sub get_pids_fromport {

    #   get all pids from port -p host:port_num
    my ( $port, $rank_pid, $rank_ppid, $rank_cmd, @handle ) = @_;
    my $portfound = 0;
    my @proc;
    foreach my $line (@handle) {
        $line =~ s/^ +//;    # take off space at start
        my @champs = split( /\s+/, $line );
        my $cmd    = $champs[$rank_cmd];
        my $base   = basename($cmd);
        if ( $base eq "TaskStarter" ) {
            if ( $line =~ /$port/ ) {
                $portfound = 0;
                foreach my $word (@champs) {
                    if ( $word eq "-p" )
                    {        # don't use =~ because may take --prefix
                        $portfound = 1;
                        next;
                    }
                    if ( $portfound == 1 ) {
                        push( @proc, $champs[$rank_pid] ) if ( $word eq $port );
                        last;
                    }
                }
            }
        }
    }
    return @proc;
}

# open support.
#
###############################################################################

sub find_ompi_prefix {
    foreach my $dir ( split $COLON, $ENV{PATH} ) {
        next unless ( -x "$dir/ompi-ps" );
        my @d = split "/", $dir;
        pop @d;
        my $prefix = join q{/}, @d;
        return "--prefix $prefix";
    }
    return $EMPTY_STRING;
}

sub open_is_installed {
    return ( find_exe('ompi-ps') and find_exe('orterun') );
}

my %open_jobs;

sub open_get_data {

    # Simply return if called more than once.
    if ( keys %open_jobs != 0 ) {
        return;

    }

    my @out = slurp_cmd('ompi-ps');

    foreach my $l (@out) {
        chomp $l;
        next if ( $l eq $EMPTY_STRING );

        my @elems = split qr{\s*\|\s*}, $l;
        next unless @elems >= 4;

# We used to check for the number of elements here and then match the jobid seperatly but that doesn't
# work for all cases as sometimes Open-Mpi has extra checkpoint-restart related data on the end
# of each line changing the element count and hence the test to fail.  Now we check the element count
# is at least as long as we expect, we check that all fields that should be integers are actually integers
# and I've moved the regexp for matching the job ID into the line test.
# For performance reasons I've put the per-rank test before the per-job test as it triggers more often.
        if (    @elems >= 6
            and ( $elems[1] =~ m{\A\[\[(\d+)\,(\d+)\]\,(\d+)\]}x )
            and is_digit( $elems[2] )
            and is_digit( $elems[3] ) )
        {
            my $job  = $1;
            my $step = $2;
            my $rank = $3;
            my $pid  = $elems[3];
            my $host = $elems[4];
            $open_jobs{$job}{$step}{hosts}{$host}++;
            $open_jobs{$job}{$step}{ranks}{$host}{$rank} = $pid;

        } elsif ( @elems >= 4
            and ( $elems[0] =~ m{\A\[(\d+)\,(\d+)]\z}x )
            and is_digit( $elems[2] )
            and is_digit( $elems[3] ) )
        {
            my $nprocs = $elems[3];
            my $job    = $1;
            my $step   = $2;
            $open_jobs{$job}{$step}{nprocs} = $nprocs;
        }
    }
    return;
}

sub open_get_jobs {
    my $user = shift;

    open_get_data();
    return keys %open_jobs;
}

sub open_setup_job {
    my $job = shift;

    open_get_data();

    my $step = $conf{orte_job_step};
    if ( not defined $step ) {
        my @steps = keys %{ $open_jobs{$job} };

        my @ordered = sort { $a <=> $b } @steps;

        $step = $ordered[0];

    }

    if ( not defined $open_jobs{$job}{$step} ) {
        printf("Job $job (step $step) does not exist\n");
        return;
    }

    my @hosts = keys %{ $open_jobs{$job}{$step}{hosts} };
    my $i     = @hosts;

    my ( $fh, $fn ) = tempfile('/tmp/padb.XXXXXXXX');

    foreach my $host (@hosts) {
        print {$fh} "$host\n";
    }
    close $fh;

    my $prefix = find_ompi_prefix();
    my $cmd    = "orterun -machinefile $fn -np $i $prefix";

    my %pcmd;
    $pcmd{nprocesses}   = $open_jobs{$job}{$step}{nprocs};
    $pcmd{nhosts}       = @hosts;
    $pcmd{process_data} = $open_jobs{$job}{$step}{ranks};
    $pcmd{command}      = $cmd;
    @{ $pcmd{host_list} } = @hosts;
    $pcmd{cleanup_cb}     = \&unlink_file;
    $pcmd{cleanup_handle} = $fn;

    return %pcmd;

}

###############################################################################
#
# lsf support.
#
###############################################################################

sub lsf_is_installed {

    # Check for both LSF and RMS, I know LSF works in other ways but I
    # don't know how to launch jobs then...
    return ( find_exe('bjobs') and rms_is_installed() );
}

sub lsf_get_jobs {
    my $user = shift;

    my @jobs;

    my @out = slurp_cmd("bjobs -r -u $user");
    foreach my $l (@out) {
        my ( $job, $juser, $stat, $queue, $from, $exec, $name, $time ) =
          split $SPACE, $l;
        next if ( $job eq 'JOBID' );
        next unless ( defined $time );
        push @jobs, $job;
    }

    return @jobs;
}

# This is a little odd, lsf allocates a resource and then pruns (-n1) the
# users script inside that resource.  That script then calls prun which is
# the real parallel job, In essence then you get one resource and (at
# least) two jobs, padb needs to target the second one.  This is controlled
# by the -Olsf_job_offset option, the default being one.
sub lsf_setup_pcmd {
    my $job = shift;

    my $machine = `rinfo -m`;
    chomp $machine;
    my $query =
      "select name,ncpus from resources where batchid=\'$machine\@$job\'";
    my $result = `rmsquery "$query"`;

    my ( $res, $ncpus ) = split $SPACE, $result;

    my @out = slurp_cmd(
"rmsquery \"select name from jobs where jobs.resource=\'$res\' and status = \'running\' order by name\""
    );

    my $rjob;

    my $idx = $conf{lsf_job_offset};
    $idx = 1 if ( $idx > $#out );
    $rjob = $out[$idx];
    chomp $rjob;
    $rem_jobid = $rjob;

    my $cmd = "prun -i /dev/null -T $res";

    return ( $cmd, $ncpus );
}

###############################################################################
#
# mpirun support.
#
###############################################################################

sub mpirun_get_jobs {
    my $user = shift;

    my @jobs;

    my %mpirun;

    map { $mpirun{$_}++ } split $COMMA, $conf{mpirun};

    foreach my $pid ( get_process_list($user) ) {

        # Works for both solaris and Linux
        my $link = proc_link($pid);
        if ( defined $link ) {
            if ( defined $mpirun{ basename($link) } ) {
                push @jobs, $pid;
                next;
            }
        }

        next if ($running_on_solaris);

        # This test does work on solaris but as it reports a full path
        # rather than just the basename it'll never pass.  It also
        # takes a long time to run so skip it.
        my $name = find_from_status( $pid, 'Name' );
        if ( defined $name and defined $mpirun{$name} ) {
            push @jobs, $pid;
            next;
        }
    }
    return @jobs;
}

sub mpirun_setup_job {
    my ($job) = @_;

    my $gdb = gdb_start();
    if ( not gdb_attach( $gdb, $job ) ) {
        if ( defined $gdb->{error} ) {
            print "$gdb->{error}\n";
        } else {
            print "Failed to attach to process\n";
        }
        return;
    }

    my $nprocs = gdb_read_value( $gdb, 'MPIR_proctable_size' );
    if ( not defined $nprocs ) {
        print "No MPIR_proctable_size symbol found, cannot continue\n";
        return;
    }

    my %pt;

    # Whilst it's possible to dip inside the struct in the process to
    # extract this information some builds don't associate a type with
    # MPIR_proctable which means in those cases this methhod won't work.
    # Instead use a set of hardcoded values for offset and size as defined
    # by the interface and do the maths for finding each element ourselves.

    # I've left the old code here for now as I suspect this is going to be
    # something that causes trouble in the future.

    # If the datatype is readable then verify it's as expected, no
    # issue if we can't read it however, then we just have to trust
    # the resource manager.
    my $proctable_type = gdb_load_type( $gdb, 'MPIR_proctable' );

    if ( defined $proctable_type ) {
        if ( defined $proctable_type->{pid}
            and $proctable_type->{pid} ne 'int' )
        {
            print
"MPIR_proctable.pid is of wrong type: \'$proctable_type->{pid}\'.";
            print "  Attempting to continue anyway...\n";
        }
    }

    if (1) {
        my $word_size = gdb_type_size( $gdb, 'void *' );
        my $table_size = ( $word_size * 2 ) + 4;

        # On 64 bit systems the struct is 20 bytes in size but needs to be
        # 8 byte alligned.
        if ( $word_size == 8 ) {
            $table_size += 4;
        }

        my $host_offset    = 0;
        my $pid_offset     = $word_size * 2;
        my $proctable_addr = gdb_var_addr( $gdb, 'MPIR_proctable' );
        my $proctable      = gdb_read_pointer( $gdb, $proctable_addr );
        my $base           = _hex($proctable);

        foreach my $proc ( 0 .. ( $nprocs - 1 ) ) {

            my $struct_base = $base + ( $table_size * $proc );
            my $hostp = gdb_read_pointer( $gdb, $struct_base + $host_offset );
            my $host = gdb_string( $gdb, 1024, $hostp );

            # Ideally this won't happen but it can and does, for example if
            # the user supplies a hostsfile with localhost specified the
            # resource manager can leave this value unmodified.
            if ( defined $host and $host eq 'localhost' ) {
                $host = hostname();
            }

            my $pid = gdb_read_int( $gdb, $struct_base + $pid_offset );
            if ( defined $host and defined $pid ) {
                $pt{$host}{$proc} = $pid;
            } else {
                print "Failed to extract process info for rank $proc\n";
            }
        }
    } else {

        foreach my $proc ( 0 .. ( $nprocs - 1 ) ) {

            my $hostp = gdb_read_value_addr( $gdb,
                "(void *)MPIR_proctable[$proc].host_name" );
            my $host = gdb_string( $gdb, 1024, $hostp );
            my $pid = gdb_read_value( $gdb, "MPIR_proctable[$proc].pid" );
            if ( defined $host and defined $pid ) {
                $pt{$host}{$proc} = $pid;
            } else {
                print "Failed to extract process info for rank $proc\n";
            }
        }
    }

    gdb_detach($gdb);
    gdb_quit($gdb);

    my @hosts = keys(%pt);

    if ( @hosts == 0 ) {
        print "No process data found\n";
        return;
    }

    my %pcmd;
    $pcmd{nprocesses}   = $nprocs;
    $pcmd{nhosts}       = @hosts;
    $pcmd{process_data} = \%pt;
    @{ $pcmd{host_list} } = @hosts;

    return %pcmd;
}

###############################################################################
#
# Resource manager support.
#
###############################################################################

sub setup_rmgr {
    $conf{rmgr} = shift;

    # Now setup the variable for the rest of the program.
    if ( defined $rmgr{ $conf{rmgr} }{inner_rmgr} ) {
        $cinner{rmgr} = $rmgr{ $conf{rmgr} }{inner_rmgr};
    } else {
        $cinner{rmgr} = $conf{rmgr};
    }
    return;
}

sub find_rmgr {

    # If it's been set on the command line and it's valid then just use
    # what we are given.  Do sanity checks here but only warn on the result
    # to cope with non-default installs.

    if ( defined $conf{rmgr} ) {
        if ( not defined $rmgr{ $conf{rmgr} } ) {
            print "Error, resource manager \"$conf{rmgr}\" not supported\n";
            exit 1;
        }

        if ( defined $rmgr{ $conf{rmgr} }{is_installed}
            and not $rmgr{ $conf{rmgr} }{is_installed}() )
        {
            print
"Warning: Selected resource manager $conf{rmgr} does not appear to be installed\n";
        }
        setup_rmgr( $conf{rmgr} );
        return;
    }

    my @ok;
    foreach my $res ( sort keys %rmgr ) {
        next unless defined $rmgr{$res}{is_installed};
        if ( $rmgr{$res}{is_installed}() ) {
            push @ok, $res;
        }
    }
    if ( @ok != 1 ) {
        print
"Error, multiple resource managers detected, use -Ormgr=<resource manager>\n";
        push @ok, 'local-fd';
        push @ok, 'local';
        print "@ok\n";
        exit 1;
    }

    setup_rmgr( $ok[0] );
    return;
}

# Find any active resource manager, that is --any or --all have been passed
# on the command line so look for any resource manager that have active
# jobs, if there is one active resource manager use that one, if there are
# zero or many exit with an error.
sub find_any_rmgr {
    my ($user) = @_;

    # If it's been set on the command line and it's valid then just use
    # what we are given.  Do sanity checks here but only warn on the result
    # to cope with non-default installs.

    if ( defined $conf{rmgr} ) {
        if ( not defined $rmgr{ $conf{rmgr} } ) {
            print "Error, resource manager \"$conf{rmgr}\" not supported\n";
            exit 1;
        }

        if ( defined $rmgr{ $conf{rmgr} }{is_installed}
            and not $rmgr{ $conf{rmgr} }{is_installed}() )
        {
            print
"Warning: Selected resource manager $conf{rmgr} does not appear to be installed\n";
        }
        setup_rmgr( $conf{rmgr} );
        return;
    }

    my @installed;
    foreach my $res ( sort keys %rmgr ) {
        next unless defined $rmgr{$res}{is_installed};
        if ( $rmgr{$res}{is_installed}() ) {
            push @installed, $res;

        }
    }

    # One resource manager is installed, good.
    if ( @installed == 1 ) {
        setup_rmgr( $installed[0] );
        return;
    }

    # No resource managers are installed, bad.
    if ( @installed == 0 ) {
        print
"Error, multiple resource managers detected, use -Ormgr=<resource manager>\n";
        push @installed, 'local-fd';
        push @installed, 'local';
        print "@installed\n";
        exit 1;
    }

    my @active;
    foreach my $res (@installed) {
        my @jobs = $rmgr{$res}{get_active_jobs}($user);
        if ( @jobs != 0 ) {
            push @active, $res;
        }
    }

    # Only one resource manager has active jobs, let's use it.
    if ( @active == 1 ) {
        setup_rmgr( $active[0] );
        return;
    }

    # Multiple resource managers are installed and have jobs, bounce back to
    # the user to specify which one they want.
    print
"Error, multiple active resource managers detected, use -Ormgr=<resource manager>\n";
    push @installed, 'local-fd';
    push @installed, 'local';
    print "@installed\n";
    exit 1;
}

sub get_all_jobids {
    my $user = shift;
    debug_log( 'rmgr', undef, 'Loading active jobs list' );
    return $rmgr{ $conf{rmgr} }{get_active_jobs}($user);
}

sub job_is_running {
    my ( $job, $user ) = @_;

    if ( defined $rmgr{ $conf{rmgr} }{job_is_running} ) {
        return $rmgr{ $conf{rmgr} }{job_is_running}($job);
    }

    my @jobs = $rmgr{ $conf{rmgr} }{get_active_jobs}($user);
    my %j;
    map { $j{$_} = 1; } @jobs;
    return defined $j{$job};
}

sub job_to_key {
    my $job = shift;

    if ( defined $rmgr{ $conf{rmgr} }{job_to_key} ) {
        return $rmgr{ $conf{rmgr} }{job_to_key}($job);
    }

    return;
}

sub setup_job {
    my $job = shift;

    # If the resource manager provides a setup_pcmd function then use it and
    # simply convert the list it provides into a hash before returning it.
    if ( exists $rmgr{ $conf{rmgr} }{setup_pcmd} ) {
        my ( $cmd, $nprocesses, $nhosts, $pd ) =
          $rmgr{ $conf{rmgr} }{setup_pcmd}($job);
        my %pcmd = (
            command    => $cmd,
            nprocesses => $nprocesses,
            nhosts     => $nhosts,
        );
        if ( defined $pd ) {
            $pcmd{process_data} = $pd;
        }
        return %pcmd;
    }

    # Otherwise call the more flexible setup_job function.
    my %pcmd = $rmgr{ $conf{rmgr} }{setup_job}($job);

# Now we have a either a command capable of launching using the selected resource
# manager, a list of hosts or both.  At this point we can pick the best way to
# launch the job by walkin the list given in the configuration until we find one
# that works.  Note this allows users to prevent the use of the resource manager
# to launch shadow jobs and also to force the use of pdsh.

    my $mode_list = $conf{launch_mode};

# The other three launchers require a host list so in the absence of one force it
# to use rmgr.
    if ( not defined $pcmd{host_list} ) {
        $mode_list = 'rmgr';
    }

    my @modes = split $COMMA, $mode_list;

    my @hosts;
    if ( defined $pcmd{host_list} ) {
        @hosts = @{ $pcmd{host_list} };
    }

    my $have_pdsh  = find_exe('pdsh');
    my $have_clush = find_exe('clush');

    foreach my $mode (@modes) {
        if ( $mode eq 'local' ) {
            my $hc = @hosts;
            my $h  = hostname();
            if ( @hosts == 1 and $hosts[0] eq hostname() ) {
                $pcmd{command} = '';
                return %pcmd;
            }
        } elsif ( $mode eq 'rmgr' ) {
            if ( defined $pcmd{command} ) {
                return %pcmd;
            }
        } elsif ( $mode eq 'ssh' ) {
            if ( @hosts == 1 ) {
                $pcmd{command} = "ssh $hosts[0]";
                return %pcmd;
            }
        } elsif ( $mode eq 'pdsh' ) {
            next unless ($have_pdsh);
            next if ( @hosts == 0 );

            my $fanout = @hosts + 5;

            $pcmd{require_inner_callback} = 1;
            my $hlist = join q{,}, @hosts;
            $pcmd{command} = "pdsh -f $fanout -w $hlist";

            if ( @hosts > 128 ) {
                print "Pdsh backend not recommended for such large jobs\n";
            }

            return %pcmd;

        } elsif ( $mode eq 'clush' ) {
            next unless ($have_clush);
            next if ( @hosts == 0 );

            my $fanout = @hosts + 5;

            $pcmd{require_inner_callback} = 1;
            my $hlist = join q{,}, @hosts;
            $pcmd{command} = "clush -f $fanout --nostdin -w $hlist";

            return %pcmd;

        } else {
            print "Backend invalid: $mode\n";
        }
    }

    print
      "No suitable backend found (perhaps try installing pdsh or clush ?)!\n";
    return;

}

###############################################################################
#
# Output formatting
#
###############################################################################

# This function isn't called but I've kept it for now in case it becomes
# needed when dealing with files.
sub strip_stack_traces {
    my ( $cargs, $lines ) = @_;

    my %above;
    my %below;

    map { $above{$_}++ }
      split $COMMA, $conf{mode_options}{stack}{stack_strip_above};
    map { $below{$_}++ }
      split $COMMA, $conf{mode_options}{stack}{stack_strip_below};

    foreach my $tag ( keys %{$lines} ) {

        # There was a subtle bug here, functions from the @above_list often
        # appear below main which this code doesn't handle all that well.
        my $main_idx;
        my $wait_idx = 0;
        for my $l ( 0 .. $#{ $lines->{$tag} } ) {
            if ( $lines->{$tag}->[$l] =~ /(\w*)\(/ ) {
                if ( defined $below{$1} ) {
                    $main_idx = $l;
                }
                if ( defined $above{$1} ) {
                    if ( defined $main_idx ) {
                        $wait_idx = $l;
                        last;
                    }
                }
            }
        }
        $main_idx = 0 if not defined $main_idx;
        if ( $main_idx != 0 or $wait_idx != 0 ) {
            my $end =
              ( $cargs->{strip_above_wait} and $wait_idx )
              ? $wait_idx
              : $#{ $lines->{$tag} };
            my $start =
              ( $cargs->{strip_below_main} and $main_idx ) ? $main_idx : 0;

            my @new = @{ $lines->{$tag} };
            @new = @new[ $start .. $end ];
            $lines->{$tag} = \@new;
        }
    }
    return;
}

sub sort_proc_hashes {
    my $carg = shift;
    my $key  = shift;
    my @all  = (@_);

    my $numeric = 1;

    foreach my $a (@all) {
        if ( not $a->{$key} =~ m{\A\d+\z} ) {
            $numeric = 0;
            last;
        }
    }

    my @sorted;

    if ($numeric) {
        @sorted = sort { $a->{$key} <=> $b->{$key} } @all;
    } else {
        @sorted = sort { $a->{$key} cmp $b->{$key} } @all;

    }

    if ( $carg->{reverse_sort_order} ) {
        return ( reverse @sorted );
    } else {
        return (@sorted);
    }
}

sub pre_mpi_watch {
    my ($nprocs) = @_;
    my $header = <<'EOF';
u: unexpected messages U: unexpected and other messages
s: sending messages r: receiving messages m: sending and receiving
b: Barrier B: Broadcast g: Gather G: AllGather r: reduce: R: AllReduce
a: alltoall A: alltoalls w: waiting
.: consuming CPU cycles ,: using CPU but no queue data -: sleeping *: error
EOF

    print $header;
    my $l = '0';

    for my $i ( 1 .. $nprocs - 1 ) {
        if ( $i % 10 == 0 ) {
            $l .= substr $i, 0, 1;
        } elsif ( $i % 5 == 0 ) {
            $l .= '5';
        } else {
            $l .= '.';
        }
    }
    print "$l\n";
    return;
}

# Convert back from a set of values (with ranges) in a namespace to a array
# of ranks containing the values.  Assume that each rank only appears in
# the namespace with one value.
sub array_from_target_namespace {
    my ($r) = @_;

    my @all;
    foreach my $value ( keys %{$r} ) {
        while ( defined( my $rank = rng_shift( $r->{$value} ) ) ) {
            $all[$rank] = $value;
        }
    }
    return @all;
}

sub tree_from_namespace {
    my ($r) = @_;

    my %res;

    foreach my $namespace ( keys %{$r} ) {
        foreach my $value ( keys %{ $r->{$namespace} } ) {
            while (
                defined( my $rank = rng_shift( $r->{$namespace}{$value} ) ) )
            {
                $res{$rank}{$namespace} = $value;
            }
        }
    }

    return \%res;
}

sub show_mpi_watch {
    my ( $handle, $lines ) = @_;

    my @all = array_from_target_namespace( $lines->{target_data}{state} );

    my $o = $EMPTY_STRING;
    while ( defined( my $v = shift @all ) ) {
        $o .= $v;
    }
    print "$o\n";
    return;
}

# Nicely format process information.  XXX: proc-sort-key should probably
# sort on column headers as well as keys.  Ideally we'd know what format we
# wanted and only ask the nodes to report relevant info, for now they still
# report everything.
sub show_proc_format {
    my ( $carg, $nlines ) = @_;

    my @proc_format_array;
    my %proc_format_header;
    my $show_fields = 0;

    my %proc_format_lengths;
    my %proc_header_reverse;

    my $separator = $carg->{column_seperator};

    my @columns = split $COMMA, $carg->{proc_format};
    foreach my $column (@columns) {

        $show_fields = 1 if ( $column eq 'fields' );

        my ( $name, $desc ) = split $EQUALS, $column;
        if ( defined $desc ) {
            push @proc_format_array, lc $name;
            $proc_format_header{ lc $name }  = $desc;
            $proc_format_lengths{ lc $name } = length $desc;
            $proc_header_reverse{ lc $desc } = lc $name;
        } else {
            push @proc_format_array, lc $column;
            $proc_format_header{ lc $column }  = $column;
            $proc_format_lengths{ lc $column } = length $column;
        }
    }

    my @all;

    my $lines = tree_from_namespace( $nlines->{target_data} );
    foreach my $tag ( keys %{$lines} ) {
        my %hash;
        $hash{rank} = $tag;
        foreach my $key ( keys %{ $lines->{$tag} } ) {

            my $value = $lines->{$tag}{$key};
            next unless defined $proc_format_lengths{$key} or $show_fields;

            if ( defined $value and length $value > $proc_format_lengths{$key} )
            {
                $proc_format_lengths{$key} = length $value;
            }

            $hash{$key} = $value;

        }
        if ($show_fields) {
            my @fields = sort keys %hash;
            print "@fields\n";
            exit 0;
        }
        push @all, \%hash;
    }

    # Allow sort keys to be based on column names as well as real keys.
    my $key = lc $carg->{proc_sort_key};
    if ( defined $proc_header_reverse{$key} ) {
        $key = $proc_header_reverse{$key};
    }
    @all = sort_proc_hashes( $carg, $key, @all );

    if ( $carg->{proc_show_header} ) {
        my @res;
        foreach my $key (@proc_format_array) {
            my $l .= sprintf "%-$proc_format_lengths{$key}s",
              $proc_format_header{$key};
            push @res, $l;
        }
        my $line = join $separator, @res;
        print "$line\n";
    }
    my $count = $carg->{nprocs_output};
    foreach my $hash (@all) {
        my @res;
        foreach my $key (@proc_format_array) {
            my $value = '??';
            if ( defined $hash->{$key} ) {
                $value = $hash->{$key};
            }
            push @res, sprintf "%$proc_format_lengths{$key}s", $value;
        }
        my $line = join $separator, @res;
        print "$line\n";
        if ( defined $count and ( --$count == 0 ) ) {
            return;
        }
    }
    return;
}

sub show_results_from_file {
    my ( $nlines, $mode ) = @_;

    my $lines = $nlines->{lines};

    if ( defined $allfns{$mode}{out_handler} ) {
        $allfns{$mode}{out_handler}( undef, $nlines );
        return;
    }

    my $of = 'raw';
    $of = 'tree'       if $output_tree;
    $of = 'compress'   if $output_compress;
    $of = 'compress_c' if $output_compress_long;

    if ( $mode eq 'stack' ) {
        if (   $conf{mode_options}{stack}{strip_above_wait}
            or $conf{mode_options}{stack}{strip_below_main} )
        {
            strip_stack_traces( $conf{mode_options}{stack}, $lines );
        }
    }

    complex_output_handler( $of, $lines );

    return;
}

###############################################################################
#
# Data collection (parallel and from file).
#
###############################################################################

sub process_line {
    my ( $line, $lines ) = @_;

    if ( $line =~ m{\A(\d+):(.*)\n}x ) {
        my $tag   = $1;
        my $value = $2;
        push @{ $lines->{lines}{$tag} }, $value;
    } else {
        print "malformed line: $line";
    }
    return;
}

sub default_output_handler {
    my ( $req, $d ) = @_;

    my $cargs = $req->{cargs};

    # Warn on missing output here, be sure to check both standard output and
    # namespace output because if we are using a tree then there won't be any
    # standard output.
    if ( not exists $d->{target_output} and not exists $d->{target_ns_output} )
    {
        return;
    }

    #return unless exists $d->{target_output};

    my $lines = $d->{target_output};
    my $mode  = $req->{mode};

    if ( defined $req->{out_format} ) {
        complex_output_handler( $req->{out_format}, $lines, $d );
    } else {
        my $nprocesses = keys %{ $d->{target_output} };
        foreach my $process ( sort { $a <=> $b } keys %{ $d->{target_output} } )
        {
            foreach my $line ( @{ $d->{target_output}{$process} } ) {
                if ( $nprocesses == 1 ) {
                    print "$line\n";
                } else {
                    print "$process:$line\n";
                }
            }
        }
    }
    return;
}

sub add_tag_to_tree {
    my ( $tree, $tag, $output ) = @_;

    my $line = shift @{$output};

    if ( not defined $tree->{$line}{range} ) {
        $tree->{$line}{range} = rng_create_empty();
        $tree->{$line}{min}   = $tag;
        $tree->{$line}{count} = 0;
    }
    rng_add_value( $tree->{$line}{range}, $tag );
    if ( $tag < $tree->{$line}{min} ) {
        $tree->{$line}{min} = $tag;
    }
    $tree->{$line}{count}++;
    if ( @{$output} > 0 ) {
        add_tag_to_tree( \%{ $tree->{$line}{desc} }, $tag, $output );
    }
    return;
}

# Calculate the formatting needed for displaying a list of variables and
# their types allowing them to be neatly formatted on the screen.  Take an
# array of hashes and combine {name} and {type} into a new value
# {type_name} which is the same length as {type_name} for all other entries
# in the array.
sub _format_local_vars {
    my ($list) = @_;

    my $max = 0;
    foreach my $var ( @{$list} ) {
        my $name_len = length $var->{name};
        my $type_len = length $var->{type};
        if ( $name_len + $type_len > $max ) {
            $max = $name_len + $type_len;
        }
    }

    $max++;

    foreach my $var ( @{$list} ) {
        my $name_len = length $var->{name};
        my $type_len = length $var->{type};
        my $pad      = $max - ( $name_len + $type_len );
        $var->{type_name} = $var->{type} . q{ } x $pad . $var->{name};
    }

    return;
}

# Controls how to display "expanded" variables in stack traces, that is
# where the value display isn't the contents of the variable but some
# mutation of that, either the name of a MPI Communicator, or a pointer
# description or that it's value is too long.
# For text based output <$var> works well IMHO however that isn't valid
# html, for now this just returns $var but perhaps having a
# --format-for-html option would be the way to go in future.
sub pretify_variable {
    my $var = shift;
    return $var;
}

sub _add_data_to_point_on_tree {
    my ( $tree, $d, $l, $max_show, $peer, @vars ) = @_;

    my @all_vars;
    foreach my $var (@vars) {
        my @type_list =
          sort keys %{ $d->{target_data}{"$peer|var_type|$var"} };
        push @all_vars,
          {
            name => $var,
            type => $type_list[0],
          };
    }

    _format_local_vars( \@all_vars );

    foreach my $vref (@all_vars) {
        my $var    = $vref->{name};
        my $key    = "$l|var|$var";
        my @values = keys %{ $d->{target_data}{$key} };

        if ( @values == 1 ) {
            my $line = "  $vref->{type_name} = '$values[0]' "
              . rng_convert_to_user( $d->{target_data}{$key}{ $values[0] } );
            push @{ $tree->{$peer}->{aux} }, $line;
        } elsif ( @values > $max_show ) {
            my $line = "  $vref->{type_name}: "
              . pretify_variable("more than $max_show distinct values");
            push @{ $tree->{$peer}->{aux} }, $line;
        } else {
            push @{ $tree->{$peer}->{aux} }, "  $vref->{type_name}:";
            foreach my $value ( sort @values ) {
                my $line = "      '$value' "
                  . rng_convert_to_user( $d->{target_data}{$key}{$value} );
                push @{ $tree->{$peer}->{aux} }, $line;

            }
        }
    }
    return;
}

sub _add_data_to_tree {
    my ( $tree, $d, $path ) = @_;

    # Sort peers by lowest rank of each branch.
    my @peers =
      sort { $tree->{$a}->{min} <=> $tree->{$b}->{min} } keys %{$tree};

    # This is ugly, dip inside the mode_options for the only mode which
    # sets this value.
    my $max_show = $conf{mode_options}{stack}{max_distinct_values};

    foreach my $peer (@peers) {
        my $l = "$path,$peer";

        if ( defined $d->{target_data}{"$peer|params"} ) {
            my @params_lists =
              sort keys %{ $d->{target_data}{"$peer|params"} };

            # It's not impossible that the same function on the same
            # line might have different params or locals, for example
            # it could be a different binary.  It's probably rare
            # enough that we can ignore it however.
            my @params = split $COMMA, $params_lists[0];

            if ( @params > 0 ) {
                push @{ $tree->{$peer}->{aux} }, "params";
            }

            _add_data_to_point_on_tree( $tree, $d, $l, $max_show, $peer,
                @params );

        }

        if ( defined $d->{target_data}{"$peer|locals"} ) {
            my @locals_lists =
              keys %{ $d->{target_data}{"$peer|locals"} };

            # It's not impossible that the same function on the same
            # line might have different params or locals, for example
            # it could be a different binary.  In the case of locals
            # simply load all of them.
            my @locals = split $COMMA, join( q{,}, @locals_lists );

            if ( @locals > 0 ) {
                push @{ $tree->{$peer}->{aux} }, "locals";
            }

            _add_data_to_point_on_tree( $tree, $d, $l, $max_show, $peer,
                @locals );

        }

        if ( defined $tree->{$peer}->{desc} ) {
            _add_data_to_tree( $tree->{$peer}->{desc}, $d, "$path,$peer" );
        }
    }
    return;
}

sub add_data_to_tree {
    my ( $tree, $ns, $d ) = @_;
    my $prefix = $EMPTY_STRING;
    if ( defined $ns ) {
        $prefix = "$ns|";
    }
    if ( defined $d->{target_data} ) {
        _add_data_to_tree( $tree, $d, $prefix );
    }
    return;
}

sub _display_tree {
    my ( $tree, $parent, $indent, $path, $enforce_spec ) = @_;

    my $ret = $EMPTY_STRING;

    # Sort peers by lowest rank of each branch.
    my @peers =
      sort { $tree->{$a}->{min} <=> $tree->{$b}->{min} } keys %{$tree};

    my $child_enforce_spec = 0;
    foreach my $peer (@peers) {

        my $vpspec = rng_convert_to_user( $tree->{$peer}->{range} );
        if ( @peers != 1 or $parent ne $vpspec or $enforce_spec ) {
            $ret .= "$indent-----------------\n";
            $ret .= "$indent$vpspec ($tree->{$peer}->{count} processes)\n";
            $ret .= "$indent-----------------\n";
        }

        $ret .= "$indent$peer\n";

        if ( defined $tree->{$peer}->{aux} ) {
            $child_enforce_spec = 1;
            foreach my $line ( @{ $tree->{$peer}->{aux} } ) {
                $ret .= "$indent      $line\n";
            }
        }

        if ( defined $tree->{$peer}->{desc} ) {
            $ret .= _display_tree( $tree->{$peer}->{desc},
                $vpspec, "$indent  ", "$path,$peer", $child_enforce_spec );
        }
    }
    return $ret;
}

sub display_tree {
    my ( $tree, ) = @_;
    return _display_tree( $tree, "no-parent", $EMPTY_STRING, $EMPTY_STRING, 1 );
}

# An experimental new tree format.
sub new_tree {
    my ( $lines, $d ) = @_;
    my %tree;
    debug_log( 'tree', $d, 'Making the tree' );
    foreach my $tag ( sort { $a <=> $b } keys %{$lines} ) {
        add_tag_to_tree( \%tree, $tag, $lines->{$tag} );
    }
    debug_log( 'tree', \%tree, 'Enhancing the tree' );
    add_data_to_tree( \%tree, undef, $d );
    debug_log( 'tree', \%tree, 'Formatting the tree' );
    my $t = display_tree( \%tree, );
    debug_log( 'tree', undef, 'Displaying the tree' );
    print $t;
    debug_log( 'tree', undef, 'Done' );
    return;
}

# An experimental new tree format.
sub new_ns_tree {
    my ( $d, $ns ) = @_;

    my %tree;
    debug_log( 'tree', undef, 'Making the tree' );
    my @tags;
    foreach my $tag ( keys %{ $d->{target_ns_output} } ) {
        if ( defined $d->{target_ns_output}->{$tag}->{$ns} ) {
            push @tags, $tag;
        }
    }
    foreach my $tag ( sort { $a <=> $b } @tags ) {
        add_tag_to_tree( \%tree, $tag, $d->{target_ns_output}->{$tag}->{$ns} );
    }
    debug_log( 'tree', \%tree, 'Enhancing the tree' );
    add_data_to_tree( \%tree, $ns, $d );
    debug_log( 'tree', \%tree, 'Formatting the tree' );
    my $t = display_tree( \%tree, );
    debug_log( 'tree', undef, 'Displaying the tree' );
    print "Stack trace(s) for thread: $ns\n";
    print $t;
    debug_log( 'tree', undef, 'Done' );
    return;
}

sub complex_output_handler {
    my ( $output, $lines, $d ) = @_;

    if ( $output eq 'tree' ) {
        if ( not defined $d->{target_ns_output} ) {
            new_tree( $lines, $d );
            return;
        }

        foreach
          my $ns ( sort { $a <=> $b } keys %{ $d->{target_data}{thread_id} } )
        {
            new_ns_tree( $d, $ns );
        }

    } elsif ( $output eq 'compress' ) {

        foreach my $tag ( sort { $a <=> $b } ( keys %{$lines} ) ) {
            next if ( not defined $lines->{$tag} );
            my $rng = rng_create_empty();
            rng_add_value( $rng, $tag );
            foreach my $tag2 ( keys %{$lines} ) {
                next if ( $tag2 eq $tag );
                if ( cmp_list( \@{ $lines->{$tag} }, \@{ $lines->{$tag2} } ) ) {
                    rng_add_value( $rng, $tag2 );
                    delete( $lines->{$tag2} );
                }
            }
            print "----------------\n";
            printf "%s\n", rng_convert_to_user($rng);
            print "----------------\n";
            foreach my $data ( @{ $lines->{$tag} } ) {
                print "$data\n";
            }
        }
    } elsif ( $output eq 'compress_c' ) {
        foreach my $tag ( sort { $a <=> $b } ( keys %{$lines} ) ) {
            print "----------------\n";
            print "$tag\n";
            print "----------------\n";
            foreach my $data ( @{ $lines->{$tag} } ) {
                print "$data\n";
            }
        }
    } else {
        die "Unexpected output mode $output";
    }
    return;
}

sub load_and_display_from_file {
    my $file = shift;
    my $mode = shift;

    #if ( $stats_total or $group ) {
    #    my @data;
    #    open( PCMD, "$file" ) or die "$prog: cant open file $file: $!\n";
    #    local $/ = "\n\n";
    #    while (<PCMD>) {
    #        s/\n//g;
    #        push @data, $_;
    #    }
    #    my $s = read_stats(@data);
    #    show_stats($s);
    #    return;
    #}

    open my $PCMD, '<', "$file" or die "$prog: cant open file $file: $!\n";
    my @data = <$PCMD>;
    close $PCMD;

    my %lines;    # A hash of arrays.

    foreach my $line (@data) {
        process_line( $line, \%lines );
    }
    show_results_from_file( \%lines, $mode );
    return;
}

sub rc_status {
    my $status = shift;
    my %rc;

    $rc{rc}     = $status >> 8;
    $rc{core}   = ( $status & 128 ) >> 7;
    $rc{signal} = $status & 127;

    return %rc;
}

sub maybe_clear_screen {
    return unless $watch;
    if ( $conf{watch_clears_screen} ) {
        printf "%s", " \033[1;1H";
        printf "%s", "\033[2J";
    }
    return;
}

sub connect_to_child {
    my ( $host, $port, $word ) = @_;

    my $socket = IO::Socket::INET->new(
        PeerAddr => $host,
        PeerPort => $port,
        Proto    => 'tcp',
    ) or die "Failed to connect to child ($host:$port)";

    print {$socket} "hello $word\n";

    return $socket;
}

sub my_encode {
    return encode_base64( nfreeze(shift), $EMPTY_STRING );
}

sub my_decode {
    return thaw( decode_base64(shift) );
}

# We have read data on a socket, process it and call any callback.
sub extract_line {
    my ( $handle, $sd ) = @_;

    my $str = $sd->{str};

    # Do this to allow telnet sessions to work.
    $str =~ s/\r//g;

    # Allow multi-line output here, making sure we process each line.
    while ( $str =~ m{\A(.+)\n}x ) {
        $sd->{line_cb}( $handle, $sd, $1 );
        my $len  = length $1;
        my $flen = length $str;
        if ( ( $len + 1 ) != $flen ) {
            $str = substr $str, $len + 1, $flen - $len;
        } else {
            $str = $EMPTY_STRING;

        }
        $sd->{str} = $str;
    }

    return;

}

# A simple "ladder" or 1-wide tree
#sub generate_comm_tree_ladder {
#    my ($a)  = @_;
#    my @b    = @{$a};
#    my $last = 'root';
#    my %comm_tree;
#    foreach my $c (@b) {
#        $comm_tree{$c}{parent} = $last;
#        push @{ $comm_tree{$last}{children} }, $c;
#        $last = $c;
#    }
#
#    return \%comm_tree;
#}

# Fairly simple this, walk through the hosts keeping a list of joints
# (Those able to accept children this iteration) and leaves (those able to
# accept children next iteration) and loop until there are no more hosts
# left to add.
sub generate_binary_tree {
    my ( $a, $width ) = @_;
    my @b = @{$a};
    my %comm_tree;

    my @leaves;

    my $root = shift @{$a};

    my @joints;
    push @joints, $root;

    $comm_tree{root}{children}[0] = $root;

    while ( @{$a} ) {
        foreach my $joint (@joints) {
            my @children = splice @{$a}, 0, $width;
            if ( @children > 0 ) {
                push @leaves, @children;
                @{ $comm_tree{$joint}{children} } = @children;
            }
        }
        @joints = @leaves;
        @leaves = ();
    }

    return \%comm_tree;
}

# For each remote process generate a tree, giving each process a parent and
# a number of children.  Currently just make this a simple "ladder" but
# should probably be a f-nomial tree.
sub generate_comm_tree {
    my ($a) = @_;

    return generate_binary_tree( $a, $conf{tree_width} );
}

# Called once when we have the socket details of the last child.
sub connect_to_children {
    my $comm_data = shift;

    debug_log( 'signon', undef, 'Received last signon, connecting to inner' );

    @{ $comm_data->{host_ids} } = sort keys %{ $comm_data->{remote} };
    $comm_data->{connection_tree} =
      generate_comm_tree( $comm_data->{host_ids} );

    my $td = $comm_data->{connection_tree}->{root}{children}[0];

    debug_log( 'ctree', $comm_data->{connection_tree}, 'connection tree' );

    my $cdata;
    $cdata->{socket} = connect_to_child(
        $td,
        $comm_data->{remote}{$td}{port},
        $comm_data->{remote}{$td}{key}
    );
    $cdata->{active}   = 1;
    $cdata->{str}      = $EMPTY_STRING;
    $cdata->{fd_desc}  = 'child socket';
    $cdata->{line_cb}  = \&command_from_inner;
    $cdata->{eof_cb}   = \&eof_from_fd;
    $cdata->{event_cb} = \&handle_event_from_socket;

    $comm_data->{sockets}{ $cdata->{socket} } = $cdata;
    $comm_data->{sel}->add( $cdata->{socket} );
    return;
}

sub issue_command_to_inner {
    my ( $cdata, $cmd ) = @_;
    my $str = my_encode($cmd);
    debug_log( 'full_duplex', $cmd, 'Sending command to inner, %d bytes',
        length $str );
    $cdata->{socket}->print("$str\n");
    return;
}

sub first_command {
    my $comm_data = shift;

    my $req;
    $req->{mode}            = 'signon';
    $req->{connection_tree} = $comm_data->{connection_tree};
    $req->{remote}          = $comm_data->{remote};

    # Also send over some of the per-run (as opposed to per-mode)
    # configuration options.
    # XXX: Need to send over scripts and other stuff here as well.

    if ( defined $comm_data->{pd} ) {
        $req->{pd} = $comm_data->{pd};
    }

    $req->{cinner} = \%cinner;
    $req->{cinner}{jobid} = $comm_data->{jobid};

    return $req;
}

my @commands;

# Push a command onto the list of commands to be executed.
sub push_command {
    my ( $mode, $out_format, $args ) = @_;

    my %cmd;
    $cmd{mode}       = $mode;
    $cmd{out_format} = $out_format if defined $out_format;
    $cmd{args}       = $args if defined $args;
    push @commands, \%cmd;
    return;
}

sub last_command {
    my $req;
    $req->{mode} = 'exit';
    return $req;
}

sub next_command {
    my $comm_data = shift;

    if ( @commands == 0 ) {
        return last_command();
    }

    my $cmd;
    my $req;

    if ($watch) {
        $cmd = $commands[0];
        $req->{detach_after_callback} = 1;
    } else {
        $cmd = shift @commands;
    }

    $req->{mode} = $cmd->{mode};

    if ( defined $cmd->{args} ) {
        $req->{cargs} = $cmd->{args};
    }

    # XXX: Should only send this list over if it makes sense, for example
    # the deadlock code only works when targeting all ranks.
    if ( defined $rank_rng ) {
        $req->{ranks} = $rank_rng;
    }

    if ( defined $cmd->{out_format} ) {
        $req->{out_format} = $cmd->{out_format};
    }

    if ( $conf{verbose} and defined $req->{cargs} ) {
        print "Mode '$req->{mode}' mode specific options:\n";
        foreach my $arg ( sort keys %{ $req->{cargs} } ) {
            if ( defined $req->{cargs}{$arg} ) {
                printf "%20s : '%s'\n", $arg, $req->{cargs}{$arg};
            } else {
                printf "%20s : undef\n", $arg;
            }
        }
    }

    return $req;
}

sub report_failed_signon {
    my ( $key, $data ) = @_;
    my $length = length $key;
    print "$key : ranks\n";
    foreach my $value ( sort keys %{$data} ) {
        printf "%$length" . "s : %s\n", $value,
          rng_convert_to_user( $data->{$value} );
    }
    return;
}

# Check all processes are detected and report an error to the user.  Return
# true if there is no processes are detected.
sub check_signon {
    my ( $comm_data, $data ) = @_;
    return if ( $conf{check_signon} eq 'none' );

    if ( not defined $data->{target_data}{FOUND}{yes}
        or rng_empty( $data->{target_data}{FOUND}{yes} ) )
    {
        print "Warning, failed to locate any ranks\n";
        return 1;
    }

    my %here;
    while (
        defined( my $proc = rng_shift( $data->{target_data}{FOUND}{yes} ) ) )
    {
        $here{$proc} = 1;
    }
    my $rng = rng_create_empty();

    foreach my $proc ( 0 .. $comm_data->{nprocesses} - 1 ) {
        if ( not defined $here{$proc} ) {
            rng_add_value( $rng, $proc );
        }
    }

    if ( not rng_empty($rng) ) {
        printf "Warning, failed to locate ranks %s\n",
          rng_convert_to_user($rng);
    }

    return if ( $conf{check_signon} eq 'missing' );

    if ( keys %{ $data->{target_data}{NAME} } != 1 ) {
        print "Warning, remote process name differs across ranks\n";
        report_failed_signon( 'name', \%{ $data->{target_data}{NAME} } );
    }

    if ( keys %{ $data->{target_data}{STATE} } != 1 ) {
        print "Warning, remote process state differs across ranks\n";
        report_failed_signon( 'state', \%{ $data->{target_data}{STATE} } );
    }
    return;
}

my $header_shown = 0;

sub maybe_show_header {
    my ($comm_data) = @_;
    return if ($header_shown);
    my $mode = $comm_data->{current_req}{mode};

    if ( defined $allfns{$mode}{pre_out_handler} ) {
        $allfns{$mode}{pre_out_handler}( $comm_data->{nprocesses} );
    }
    $header_shown = 1;
    return;
}

sub format_target_data {
    my ($td) = @_;

    my $ret = "\n";
    foreach my $name ( sort keys %{$td} ) {
        $ret .= "Namespace: \"$name\"\n";
        foreach my $value ( sort keys %{ $td->{$name} } ) {
            $ret .= "    $value\t";
            $ret .= rng_convert_to_user( $td->{$name}{$value} ) . "\n";
        }
    }
    return $ret;
}

sub command_from_inner {
    my ( $comm_data, $cdata, $line ) = @_;

    # Initial signon from child.
    if ( $line eq 'Welcome' ) {
        my $req = first_command($comm_data);
        $comm_data->{current_req} = $req;
        issue_command_to_inner( $cdata, $req );
        return;
    }

    # A reply from inner.
    my $d = my_decode($line);

    debug_log( 'full_duplex', $d, 'Reply from inner, %d bytes', length $line );

    # The inner process has signed on.
    if ( $comm_data->{current_req}->{mode} eq 'signon' ) {

        # Allow the find_pids function to report back a different job
        # size to the one the resource manager spotted, potentially
        # because there is a job running under an allocation and there
        # may be a discrepancy between the two.
        if ( defined $d->{target_data}{JOB_SIZE} ) {
            my @size = keys %{ $d->{target_data}{JOB_SIZE} };
            if ( @size == 1 ) {
                $comm_data->{nprocesses} = $size[0];
            } else {
                print
                  "More than one value reported for Job Size, using largest\n";
                my @s = sort { $a <=> $b } @size;
                $comm_data->{nprocesses} = $s[-1];
            }
        }

        # Check the signon messages, reporting minor errors to the user, if
        # no processes are found then don't bother processing any commands
        # but just tell the inner to exit.
        my $error = check_signon( $comm_data, $d );
        if ($error) {
            $comm_data->{current_req} = last_command();
        } else {
            $comm_data->{current_req} = next_command($comm_data);
        }

        issue_command_to_inner( $cdata, $comm_data->{current_req} );
        $comm_data->{state} = 'live';
        return;
    }

    # The inner process is about to exit.
    if ( $comm_data->{current_req}->{mode} eq 'exit' ) {
        $comm_data->{state} = 'shutdown';
        return;
    }

    # Check for all processes being found, or rather check for none being
    # found.  If there are none then we probably don't have any info from
    # the so tell the inner to quit and don't process any data we have just
    # received.
    if ( not defined $d->{target_data}{FOUND}{yes} ) {
        print "No remaining processes, is job dead?\n";
        $comm_data->{current_req} = last_command();
        issue_command_to_inner( $cdata, $comm_data->{current_req} );
        return;
    }

    # We have received a reply to a request, send the next request first
    # and then display this reply.  If in watch mode display the reply,
    # sleep and then send the next request.
    my $req = next_command($comm_data);
    if ( not $watch ) {
        issue_command_to_inner( $cdata, $req );
    }

    if ( defined $d->{target_data} ) {
        debug_log(
            'tdata', $d->{target_data},
            'Target data %s',
            format_target_data( $d->{target_data} )
        );
    }

    if ( defined $d->{target_output} ) {
        debug_log( 'tdata', $d->{target_output}, 'Target output' );
    }

    if ( defined $d->{target_ns_output} ) {
        debug_log( 'tdata', $d->{target_ns_output}, 'Target namespace output' );
    }

    maybe_clear_screen();
    maybe_show_header($comm_data);

    # Mode here is the mode for the reply we just got, this may not be the
    # same thing as the request we are currently sending.
    my $mode = $comm_data->{current_req}->{mode};

    if ( defined $d->{target_data}{ERROR} ) {
        print "Warning: errors reported by some ranks\n========\n";
        foreach my $error ( sort keys %{ $d->{target_data}{ERROR} } ) {
            printf "%s: %s\n",
              rng_convert_to_user( $d->{target_data}{ERROR}{$error} ), $error;
        }
        print "========\n";
    }

    if ( defined $allfns{$mode}{out_handler} ) {
        $allfns{$mode}{out_handler}( $conf{mode_options}{$mode}, $d,
            $comm_data->{current_req} );
    } else {
        default_output_handler( $comm_data->{current_req}, $d );
    }

    $comm_data->{current_req} = $req;

    if ($watch) {
        sleep $conf{interval};
        issue_command_to_inner( $cdata, $req );
    }

    return;
}

sub handle_signon {
    my ( $comm_data, $host, $port, $key ) = @_;

    $comm_data->{remote}{$host}{port} = $port;
    $comm_data->{remote}{$host}{key}  = $key;
    $comm_data->{signons}++;

    if ( $comm_data->{signons} == $comm_data->{nhosts} ) {
        connect_to_children($comm_data);
    }
    return;
}

sub hello_from_inner {
    my ( $comm_data, $cdata, $line ) = @_;

    # Children connect back with "Hello $outerkey $hostname $port $innernkey";
    my @words = split $SPACE, $line;
    if ( @words != 5 or $words[0] ne 'Hello' or $words[1] ne $secret ) {
        print "Bad signon $line\n";
        return 0;
    }

    handle_signon( $comm_data, $words[2], $words[3], $words[4] );

    if ( $comm_data->{signons} == $comm_data->{nhosts} ) {

        # Don't listen on this port any more;
        $comm_data->{sel}->remove( $comm_data->{listen} );
        $comm_data->{listen}->close();

    }
    return;
}

sub inner_stdout_cb {
    my ( $comm_data, $cdata, $line ) = @_;
    my @words = split $SPACE, $line;
    if ( @words == 4 and $words[0] eq 'connect' ) {

        handle_signon( $comm_data, $words[1], $words[2], $words[3] );
        return;
    } elsif ( $words[0] eq 'debug' ) {
        my $count = $comm_data->{sel}->count();
        print "There are $count sockets\n";
        return;
    }
    print "inner: $line\n";
    return;
}

sub inner_stderr_cb {
    my ( $comm_data, $cdata, $line ) = @_;
    print "einner: $line\n";
    return;
}

sub eof_from_fd {
    my ( $comm_data, $cdata ) = @_;

    if ( $comm_data->{state} ne 'shutdown' ) {
        print "Unexpected EOF from $cdata->{fd_desc} ($comm_data->{state})\n";
    }

    #print("Expected EOF from $cdata->{fd_desc} ($comm_data->{state})\n");
    return;
}

sub handle_event_from_socket {
    my ( $comm_data, $h ) = @_;
    my $cdata = $comm_data->{sockets}{$h};

    my $data;
    my $nb = sysread $h, $data, 65536;

    if ( $nb == 0 ) {
        if ( defined $cdata->{eof_cb} ) {
            $cdata->{eof_cb}( $comm_data, $cdata );
        }
        $comm_data->{sel}->remove($h);
        $h->close();
    } else {
        $cdata->{str} .= $data;
        extract_line( $comm_data, $cdata );
    }
    return;
}

sub handle_event_from_port {
    my ( $comm_data, $h ) = @_;

    my $new = $h->accept();
    $comm_data->{sel}->add($new);
    my %cdata;
    $cdata{str}                 = $EMPTY_STRING;
    $cdata{line_cb}             = \&hello_from_inner;
    $cdata{event_cb}            = \&handle_event_from_socket;
    $comm_data->{sockets}{$new} = \%cdata;
    return;
}

###############################################################################
#
# Range mapping functions.
#
###############################################################################

# A common set of functions for dealing with (integer based) ranges.

# Internally a array format is used for speed, functions exist to convert
# from the normal list format "[0-12,15,16]" to the internal one and back
# again.

# rng_convert_from_user($userrange)
# rc_convert_to_user($range)
#   Convert to and from the normal type to the internal type.

# rng_shift($range)
#   Pop the lowest value off the range.

# rng_add_value($range,$value)
#   Add a value to the range.

# rng_merge($range,$new)
#   Merge two ranges.

# rng_dup($range)
#   Duplicate a range

# rng_create_from_array(@array)
#   Create a range from an array

# rng_create_empty
#   Create a empty range

# rng_empty
#   Test for emptyness.

# Potentially needed but not implemented yet

# rng_user_verify()
# is_value_in_range()
# nvalues_in_range()  - Return the number of values in a range.
# rng_min()           - Return the minimum value in a range.
# rng_common()        - Take two ranges and return the common values.
# rng_find_missing()
#   Take two ranges and return all that are in the first but not in the
#   second. (see check_signon).

# Convert from a user range to a internal one.
sub rng_convert_from_user {
    my ($range) = @_;

    return unless defined $range;
    return if $range eq $EMPTY_STRING;
    return if $range eq "[]";

    my $newrange;

    if ( $range =~ m{\A\[([\d\-\,]+)\]\z}x ) {
        $newrange = $1;
    } elsif ( $range =~ m{\A(\d+)\z}x ) {
        $newrange = $1;
    } else {
        confess("Failed to recognise $range as range\n");
    }

    my @user_parts = split $COMMA, $newrange;

    my @parts;

    foreach my $part (@user_parts) {
        my %part;
        if ( $part =~ m{\A(\d+)\z}x ) {
            $part{l} = $1;
            $part{u} = $1;
        } elsif ( $part =~ m{\A(\d+)-(\d+)\z}x ) {
            $part{l} = $1;
            $part{u} = $2;
        } else {
            confess("Failed to recognise $part as range\n");
        }
        push @parts, \%part;
    }
    return \@parts;
}

sub rng_convert_to_user {
    my ($rg) = @_;

    my $range = join q{,},
      map { $_->{l} == $_->{u} ? $_->{l} : $_->{l} . q{-} . $_->{u} } @{$rg};
    return "[$range]";
}

sub rng_shift {
    my ($rg) = @_;

    # Return undef if this range is empty.
    return if ( @{$rg} == 0 );

    my $value = $rg->[0]->{l};
    if ( $rg->[0]->{l} == $rg->[0]->{u} ) {
        shift @{$rg};
    } else {
        $rg->[0]->{l}++;
    }
    return $value;
}

# Note the performance of this function is much higher when adding values
# at the top of the range than at the start, presumably it's easier to make
# an array longer than it is to unshift something onto the start.  Quietly
# return if the value is already in the range.
sub rng_add_value {
    my ( $rg, $value ) = @_;

    if ( ref( $rg->[0] ) eq $EMPTY_STRING ) {
        push @{$rg}, { l => $value, u => $value };
        return;
    }

    # If it's after the last value then just add it.
    {
        my $lu = $rg->[-1]->{u};
        if ( $value > $lu ) {

            if ( $value == $lu + 1 ) {
                $rg->[-1]->{u}++;
                return;
            }
            push @{$rg}, { l => $value, u => $value };
            return;
        }
    }

    my $idx = 0;
    foreach my $part ( @{$rg} ) {

        my $l = $part->{l};
        my $u = $part->{u};

        if ( $value < $l ) {

            # Extend the current entry downwards.
            if ( $value == $l - 1 ) {
                $part->{l}--;
                return;
            }

            # If it's before the current entry then insert it.
            splice @{$rg}, $idx, 0, { l => $value, u => $value };
            return;
        } elsif ( $value == $u + 1 ) {

            # If we meet the subsequent entry then merge the two.  No need
            # to check there is a subsequent entry here as that case has
            # already been dealt with above.
            if ( $u + 2 == $rg->[ $idx + 1 ]->{l} ) {
                $part->{u} = $rg->[ $idx + 1 ]->{u};
                splice @{$rg}, $idx + 1, 1;
                return;
            }

            # Extend the current entry upwards.
            $part->{u}++;

            return;
        } elsif ( $value >= $l and $value <= $u ) {

            # Already in range.
            return;
        }
        $idx++;
    }
    confess('Failed to add value to range');
}

sub rng_merge {
    my ( $rg, $new ) = @_;

    # Need to use defined here as zero is a valid value to store in a
    # range.
    while ( defined( my $val = rng_shift($new) ) ) {
        rng_add_value( $rg, $val );
    }
    return;
}

sub rng_dup {
    my ($rg) = @_;
    return dclone($rg);
}

sub rng_create_from_array {
    my (@r) = @_;

    my $rng = rng_create_empty();

    # Sort the array into numerical order so that the add_value calls can
    # be fast.

    foreach my $v ( sort { $a <=> $b } @r ) {
        rng_add_value( $rng, $v );
    }
    return $rng;
}

sub rng_create_empty {
    my @r;
    return \@r;
}

sub rng_empty {
    my ($rg) = @_;

    return ( ref( $rg->[0] ) eq $EMPTY_STRING );
}

sub create_local_port {
    my ($range) = @_;

    my %options = (
        Reuse  => 1,
        Proto  => 'tcp',
        Listen => 2,
    );

    if ( not defined $range ) {
        my $sl = IO::Socket::INET->new(%options)
          or confess("Failed to create local port: $!");
        return $sl;
    }

    my $rg = rng_convert_from_user($range);

    while ( my $port = rng_shift($rg) ) {
        $options{LocalPort} = $port;
        my $sl = IO::Socket::INET->new(%options);
        return $sl if defined $sl;
    }

    die "Failed to create local port, no free ports in range \"$range\"\n";
}

sub go_parallel {
    my ( $jobid, $cmd, $nprocesses, $nhosts, $pd ) = @_;

    my $comm_data;

    my $sel = IO::Select->new();
    if ( $conf{inner_callback} ) {
        my $sl = create_local_port( $conf{port_range} );

        $comm_data->{listen} = $sl;
        my $port     = $sl->sockport();
        my $hostname = hostname();
        config_set_internal( 'outer', "$hostname:$port" );
        $sel->add($sl);

        my %cdata;
        $cdata{event_cb} = \&handle_event_from_port;
        $comm_data->{sockets}{$sl} = \%cdata;
    }

    if ( defined $pd ) {
        debug_log( 'verbose', $pd,
            'Remote process data available on frontend' );
        $comm_data->{pd} = $pd;
    }

    map { $cmd .= " --$_=\"$cinner_cmd{$_}\"" } keys %cinner_cmd;

    debug_log( 'show_cmd', undef, $cmd );

    my $pcmd = {
        pid => -1,
        out => *OUT,
        err => *ERR,
    };

    $pcmd->{pid} = open3( $pcmd->{in}, *OUT, *ERR, $cmd )
      or confess "Unable to open3() pcmd: $!\n";

    close $pcmd->{in};

    $comm_data->{nhosts}     = $nhosts;
    $comm_data->{nprocesses} = $nprocesses;
    $comm_data->{cmd}        = $cmd;
    $comm_data->{jobid}      = $jobid;
    $comm_data->{signons}    = 0;

    # State, one of "connecting" "live" and "shutdown";
    $comm_data->{state} = 'connecting';

    $sel->add( $pcmd->{out} );
    $sel->add( $pcmd->{err} );

    $comm_data->{sel} = $sel;
    my $start = time;

    my %op;
    $op{str}                              = $EMPTY_STRING;
    $op{line_cb}                          = \&inner_stdout_cb;
    $op{eof_cb}                           = \&eof_from_fd;
    $op{fd_desc}                          = 'Inner stdout';
    $op{event_cb}                         = \&handle_event_from_socket;
    $comm_data->{sockets}{ $pcmd->{out} } = \%op;

    my %ep;
    $ep{str}                              = $EMPTY_STRING;
    $ep{line_cb}                          = \&inner_stderr_cb;
    $ep{eof_cb}                           = \&eof_from_fd;
    $ep{fd_desc}                          = 'Inner stderr';
    $ep{event_cb}                         = \&handle_event_from_socket;
    $comm_data->{sockets}{ $pcmd->{err} } = \%ep;

    while ( $sel->count() > 1 ) {
        while ( my @live = $sel->can_read(5) ) {
            foreach my $h (@live) {
                if ( defined $comm_data->{sockets}{$h} ) {
                    my $cdata = $comm_data->{sockets}{$h};
                    $cdata->{event_cb}( $comm_data, $h );
                } else {
                    print "Responce from unknown fd $h\n";
                    exit 1;
                }
            }
        }
        my $t2    = time - $start;
        my $count = $sel->count();
        if ( $count > 0 ) {

            if ( $comm_data->{signons} != $comm_data->{nhosts} ) {
                my $missing = $comm_data->{nhosts} - $comm_data->{signons};
                print "Waiting for signon from $missing hosts.\n";
            }
        }
    }

    waitpid $pcmd->{pid}, 0;
    my $res = $?;

    if ( $comm_data->{state} ne 'shutdown' ) {
        print
          "Unexpected exit from parallel command (state=$comm_data->{state})\n";
    }
    print "result from parallel command is $res (state=$comm_data->{state})\n"
      if ( $conf{verbose} );

    if ( $res != 0 ) {
        my %status = rc_status($res);
        if ( job_is_running( $jobid, $target_user ) ) {
            print
              "Bad exit code from parallel command (exit_code=$status{rc})\n";
        } else {
            print "Job $jobid is no longer active\n";
            return 1;
        }
    }

    return 0;
}

sub create_padb_secret {
    my $filename = "$ENV{HOME}/.padb-secret";
    my $FD;
    if ( not open $FD, '>', $filename ) {
        print "Failed to create secret file: $!\n";
        return;
    }
    if ( chmod( 0600, $filename ) != 1 ) {
        print "Failed to chmod secret file: $!\n";
        return;
    }
    my $s = rand;
    print {$FD} "secret=$s\n";
    close $FD;
    print "Sucessfully created secret file ($filename)\n";
    return;
}

sub find_padb_secret {

    my $file = "$ENV{HOME}/.padb-secret";
    if ( !-f $file ) {
        print "No secret file ($file)\n";
        return;
    }
    my (
        $dev,  $ino,   $mode,  $nlink, $uid,     $gid, $rdev,
        $size, $atime, $mtime, $ctime, $blksize, $blocks
    ) = stat $file;

    # Check that the file is mode 100600 (Octal)
    if ( $mode != 33152 ) {
        print "Wrong permissions on secret file, should be 0600 ($file)\n";
        exit 1;
    }

    my @l = slurp_file($file);
    if ( @l != 1 ) {
        return;
    }
    if ( $l[0] =~ m{\Asecret=([\w\d\.]+)\Z}x ) {
        return $1;
    }
    print "Failed to load secret from file ($file)\n";
    exit 1;
}

# Find the path to use for padb, previously we just used $0 here however
# when using pdsh to find jobs $PWD isn't preserved between the inner and
# the outer so we have to search out the real location of padb.  It would
# be possible to do this on request, that is only for pdsh rather than
# globally however there should be no down-side to always executing this.
# Based on a patch from Thipadin Seng-Long @ Bull.

sub padb_path {
    my $pwd   = $ENV{PWD};
    my $dirnm = dirname($0);

    # if padb is launch as padb then dirname is .
    # if padb is launched with a full path then dir is full
    # if padb is launched as ../padb then dir is ..
    # if padb is launched as Dir/padb then dir is Dir
    my $out;
    if ( $dirnm eq "." ) {    # started in current dir
        my $base = basename($0);
        return "$pwd\/$base";
    } elsif ( $dirnm =~ m{\A/}x ) {    # started as full path or path known
        return $0;
    }

    return "$pwd\/$0";
}

sub go_job {
    my $jobid = shift;

    $conf{verbose} && print "Attaching to job $jobid\n";

    $rem_jobid = $jobid;

    # Setup whatever is needed for running parallel commands, note this
    # might involve setting environment variables.
    my %pcmd = setup_job($jobid);

    debug_log( 'pcmd', \%pcmd, 'Loaded pcmd data' );

    my $cmd    = $pcmd{command};
    my $ncpus  = $pcmd{nprocesses};
    my $nhosts = $pcmd{nhosts};
    my $pd     = $pcmd{process_data};

    if ( defined $rmgr{ $conf{rmgr} }{require_inner_callback}
        and $rmgr{ $conf{rmgr} }{require_inner_callback} )
    {
        $conf{inner_callback} = 1;
    }

    if ( defined $pcmd{require_inner_callback} ) {
        $conf{inner_callback} = $pcmd{require_inner_callback};
    }

    $conf{verbose} && defined $ncpus  && print "Job has $ncpus process(es)\n";
    $conf{verbose} && defined $nhosts && print "Job spans $nhosts host(s)\n";

    debug_log( 'verbose', undef, 'There are %d processes over %d hosts',
        $ncpus, $nhosts );

    $cmd .= " " . padb_path() . " --inner";

    if ( $conf{inner_callback} ) {
        $secret = find_padb_secret();

        if ( not defined $secret ) {
            print "Error: Could not load secret file on this node\n";
            print "Use --create-secret-file to create one\n";
            exit 1;
        }

    }

    if ( not defined $nhosts ) {
        print "Fatal problem setting up the resource manager: $conf{rmgr}\n";
        return 1;
    }
    my $errors = go_parallel( $jobid, $cmd, $ncpus, $nhosts, $pd );

    debug_log( 'verbose', undef, 'Completed command' );

    if ( defined $pcmd{cleanup_cb} ) {
        $pcmd{cleanup_cb}( $pcmd{cleanup_handle} );
    }

    return $errors;
}

###############################################################################
#
# Outer main
#
###############################################################################

sub cmdline_error {
    my $str = shift;
    print {*STDERR} $str;
    exit 1;
}

sub config_init {
    map { $ic_names{$_}++ } @inner_conf;
    map { $ic_names_cmd{$_}++ } @inner_conf_cmd;
    return;
}

sub config_set_internal {
    my ( $key, $value ) = @_;
    if ( exists $conf{$key} ) {
        $conf{$key} = $value;
    } else {
        foreach my $mode ( keys %{ $conf{mode_options_reverse}{$key} } ) {
            $conf{mode_options}{$mode}{$key} = $value;
        }
    }

    # Mark this variable to be passed onto the inner processes.
    if ( defined $ic_names{$key} ) {
        $cinner{$key} = $value;
    }

    if ( defined $ic_names_cmd{$key} ) {
        $cinner_cmd{$key} = $value;
    }
    return;
}

sub config_set {
    my ( $key, $value ) = @_;
    print "Setting '$key' to '$value'\n" if ( $conf{verbose} );

    if (    not exists $conf{$key}
        and not exists $conf{mode_options_reverse}{$key} )
    {
        print {*STDERR}
          "Warning, unknown config option '$key' value '$value'.\n";
    }

    config_set_internal( $key, $value );
    return;
}

sub config_from_file {
    my $file = shift;

    print "Loading config from \"$file\"\n" if ( $conf{verbose} );

    foreach my $line ( slurp_file($file) ) {
        if (
            $line =~ m{\A       # Beginning of line.
                        ([\w-]+) # Key.
                        \s*\=\s* # Assignment.
                        (\S+)    # Value.
                        \s*      # Optional whitespace
                        \Z}x
          )
        {

            my $key   = $1;
            my $value = $2;
            $key =~ s{-}{_}gx;
            config_set( $key, $value );
        }
    }

    return;
}

sub config_from_env {
    print "Loading config from environment\n" if ( $conf{verbose} );

    foreach my $key ( keys %conf ) {
        my $name = 'PADB_' . uc $key;
        if ( defined $ENV{$name} ) {
            config_set( $key, $ENV{$name} );
        }
    }

    foreach my $key ( keys %{ $conf{mode_options_reverse} } ) {
        my $name = 'PADB_' . uc $key;
        if ( defined $ENV{$name} ) {
            config_set( $key, $ENV{$name} );
        }
    }
    return;
}

sub _config_help {
    my ($mode) = @_;

    print "\nOptions for mode '$allfns{$mode}{arg_long}'\n";

    my $max_len = 0;

    foreach my $key ( sort keys %{ $conf{mode_options}{$mode} } ) {
        if ( length $key > $max_len ) {
            $max_len = length($key);
        }
    }

    foreach my $key ( sort keys %{ $conf{mode_options}{$mode} } ) {
        my $name = $key;
        $name =~ s{_}{-}gx;
        if ( defined $conf{mode_options}{$mode}{$key} ) {
            printf
              " %$max_len" . "s = '%s'\n",
              $name, $conf{mode_options}{$mode}{$key};
        } else {
            printf " %$max_len" . "s = undef\n", $name;
        }
    }
    return;
}

sub config_help {
    my ($mode) = @_;
    print "Current options are:\n";

    my $max_len = 0;

    foreach my $key ( keys %conf ) {
        next if ( ref( $conf{$key} ) eq 'HASH' );
        if ( length $key > $max_len ) {
            $max_len = length $key;
        }
    }

    foreach my $key ( sort keys %conf ) {
        next if ( ref( $conf{$key} ) eq 'HASH' );
        my $name = $key;
        $name =~ s{_}{-}gx;
        if ( defined $conf{$key} ) {
            printf " %$max_len" . "s = '$conf{$key}'\n", $name;
        } else {
            printf " %$max_len" . "s = unset\n", $name;
        }
    }

    if ( defined $mode ) {
        _config_help($mode);
    } else {
        foreach ( sort keys %{ $conf{mode_options} } ) {
            _config_help($_);
        }
    }
    return;
}

sub outer_main {

    my $mode = parse_args_outer();

    my $user = $target_user;

    if ( getpwnam $user eq $EMPTY_STRING ) {
        print {*STDERR} "$prog: Error: no such user as '$user'\n";
        exit 1;
    }

  # Load from the config files first, then the env and finally the command line.

    config_init();

    config_from_file($configfile);

    config_from_file("$ENV{HOME}/.padbrc") unless ( $norc == 1 );

    config_from_env();

    print "Loading config from command line\n" if ( $conf{verbose} );

    # Once again there is a 'bugette' here, you cant pass the first of
    # these strings through due to the split hacking off everything to the
    # right of the second equals sign however you can do the second.
    # -Oedbopt="--pagesize=8192 --pagesize-header=4096"
    # -Oedbopt="--pagesize 8192 --pagesize-header 4096"
    foreach my $key ( keys %config_options ) {

        my $val = $config_options{$key};

        my $name = $key;

        $key =~ s{-}{_}gx;

        if (    not exists $conf{$key}
            and not exists $conf{mode_options_reverse}{$key} )
        {
            print "Error, unknown config option '$name'\n";
            config_help($mode);
            exit 1;
        }
        config_set( $key, $val );
    }

    foreach my $co (@conf_bool) {
        config_set_internal( $co, check_and_convert_bool( $conf{$co} ) );
    }

    foreach my $co (@conf_time) {
        config_set_internal( $co, check_and_convert_time( $conf{$co} ) );
    }

    foreach my $co (@conf_int) {

        # Only check for defined values here, for some options only
        # intergers are valid but the default value is undef which means
        # padb should attempt to do the right thing.
        if ( defined $conf{$co} ) {
            check_int( $conf{$co} );
        }
    }

    # Now go through all the config options and both verify they are
    # acceptable and convert true/false strings to 1/0 values.
    foreach my $key ( keys %{ $conf{mode_options_reverse} } ) {
        foreach my $mode ( keys %{ $conf{mode_options_reverse}{$key} } ) {
            if ( defined $conf{options_verify}{$mode}{$key} ) {
                $conf{mode_options}{$mode}{$key} =
                  $conf{options_verify}{$mode}{$key}(
                    $conf{mode_options}{$mode}{$key} );
            }
        }
    }

    debug_log( 'config', \%conf, 'Finished setting configuration options' );

    if ($create_secret) {
        create_padb_secret();
        exit 0;
    }

    if ($list_rmgrs) {
        foreach my $res ( sort keys %rmgr ) {

            if ( defined $rmgr{$res}{is_installed}
                and not $rmgr{$res}{is_installed}() )
            {
                print "$res: Not detected on system.\n";
                next;
            }

            print "$res: ";
            my @jobs = $rmgr{$res}{get_active_jobs}($user);
            if ( @jobs > 0 ) {
                my $j = join q{ }, sort { $a <=> $b } @jobs;
                print "$j\n";
            } else {
                print "No active jobs.\n";
            }
        }
        exit 0;
    }

    if ($core_stack) {
        if ( not defined $core_name or not defined $exe_name ) {
            print
              "Usage $0 --core-stack --core=<corefile> --exe=<executable>\n";
            exit 1;
        }
        if ( not -f $exe_name ) {
            print "Error: executable file '$exe_name' does not exist!\n";
            exit 1;
        }
        if ( not -f $core_name ) {
            print "Error: core file '$core_name' does not exist!\n";
            exit 1;
        }
        stack_from_core( $exe_name, $core_name );
        exit 0;
    }

    if ($full_report) {

        find_rmgr();

        if ( not job_is_running( $full_report, $user ) ) {
            print {*STDERR}
"Job $full_report is not active, use --show-jobs to see active jobs\n";
            exit 1;
        }

        print "padb version $version\n";
        print "full job report for job $full_report\n\n";

        push_command( 'mqueue', 'compress' );

        push_command('deadlock');

        my $c = $conf{mode_options}{stack};
        $c->{strip_above_wait}   = 0;
        $c->{stack_shows_params} = 1;
        $c->{stack_shows_locals} = 1;
        push_command( 'stack', 'tree', $c );

        go_job($full_report);
        exit 0;
    }

    if ($show_jobs) {
        find_rmgr();
        my @jobids = get_all_jobids($user);
        print "@jobids\n";
        exit 0;
    }

    if ($local_stats) {

        if ($watch) {
            while (1) {
                maybe_clear_screen();
                local_stats();
                sleep $conf{interval};
            }
        } else {
            local_stats();
        }
        exit 0;
    }

    if ( $all or $any ) {
        if ( @ARGV != 0 ) {
            cmdline_error(
                "$prog: Error: --all incompatible with specific ids\n");
        }
    } elsif ( !$input_file ) {
        if ( @ARGV == 0 ) {
            cmdline_error(
                "$prog: Error: no jobs specified, use --all or jobids\n");
        }
    }

    if ( ( grep { $_ } ( $any, $all, $input_file ) ) > 1 ) {
        cmdline_error(
            "$prog: Error: only specify one of --all, --any or --input_file,\n"
        );
    }

    my $style_count =
      ( grep { $_ } ( $output_compress, $output_compress_long, $output_tree ) );
    if ( $style_count > 1 ) {
        cmdline_error(
"$prog: Error: only specify one of --compress, --compress-long or --tree\n"
        );
    }

    if ( not $input_file
        and ( $have_allfns_option != 1 ) )
    {
        cmdline_error(
"$prog: Error: you must specify only one of -x, -S, -s, -g, -q, -X or --kill\n"
        );
    }

    if ( $output_tree
        and not( ( defined $mode and $mode eq 'stack' ) or $input_file ) )
    {
        cmdline_error("$prog: Error: --tree only works with --stack-trace\n");
    }

    if ( defined $input_file ) {
        my $m = 'input';
        if ( defined $mode ) {
            $m = $mode;
        }
        load_and_display_from_file( $input_file, $m );
        exit 0;
    }

    my @jobids;

    if ( $any or $all ) {

        find_any_rmgr($user);

        @jobids = get_all_jobids($user);
        printf "Active jobs (%d) are @jobids\n", $#jobids + 1
          if $conf{verbose};
        if ( @jobids == 0 ) {
            print "No active jobs could be found for user '$user'\n";
            exit 1;
        }
        if ( $any && @jobids > 1 ) {
            print "More than 1 active job (@jobids) for user '$user'\n";
            exit 1;
        }
    } else {
        find_rmgr();

        foreach my $jobid (@ARGV) {
            if ( job_is_running( $jobid, $user ) ) {
                push @jobids, $jobid;
            } else {
                print {*STDERR} "Job $jobid is not active\n";
            }
        }
    }

    if ( @jobids > 1 and $watch ) {
        print "Cannot use --watch with more than one job\n";
        exit 1;
    }

    foreach my $jobid (@jobids) {

        print "\nCollecting information for job '$jobid'\n\n"
          if ( $conf{verbose} or ( @jobids > 1 ) );

        my $of;
        $of = 'tree'       if $output_tree;
        $of = 'compress'   if $output_compress;
        $of = 'compress_c' if $output_compress_long;
        push_command( $mode, $of, $conf{mode_options}{$mode} );
        go_job($jobid);
    }
    return;
}

###############################################################################
#
# Inner.
#
###############################################################################

# The code below here used to be in a separate script (padb-helper.pl) but
# it's become apparent that for ease-of-distribution padb works better if
# it is self-contained in one file.  Now we just have a big switch on
# ARGV[0] and either run the inner or outer code depending on if it's set
# or not.

my %inner_conf;

my %inner_output;
my %inner_ns_output;
my %local_target_data;

sub output_namespace {
    my ( $rank, $ns, $str ) = @_;

    push @{ $inner_ns_output{$rank}{$ns} }, $str;
    return;
}

sub output {
    my ( $vp, $str ) = @_;

    if ( not defined $str ) {
        carp('no output');
    }

    push @{ $inner_output{$vp} }, $str;
    return;
}

# Report a single string error for a given target rank.
sub target_error {
    my ( $rank, $error ) = @_;
    target_key_pair( $rank, 'ERROR', $error );
    return;
}

# Report a single string error for a given target rank.  By convention (for
# now) this is called with UPPERCASE keys for control/padb values and
# lowercase keys for mode values.
sub target_key_pair {
    my ( $rank, $key, $value ) = @_;

    if ( defined $local_target_data{$key}{$value} ) {
        rng_add_value( $local_target_data{$key}{$value}, $rank );
    } else {
        $local_target_data{$key}{$value} = rng_convert_from_user($rank);
    }
    return;
}

sub p_die {
    my ( $vp, $str ) = @_;
    croak($str);
}

sub is_parent_resmgr {
    my $pid = shift;
    my $parent_pid = find_from_status( $pid, 'PPid' );
    return is_resmgr_process($parent_pid);
}

# This used to happen on ia64 when gdb segfaulted, I've not seen that for a
# number of years however so lets try not doing it for a while and see
# where that gets us.

# Enabling this again until I can fix things properly as it's causing problems.
# if gdb isn't installed then we get a SIGPIPE (which is normally fatal) when
# starting it, previously this would happen inside an eval block but now with
# the global attach it doesn't.  Ideally we should catch the case where gdb isn't
# there and pass the error back up but that is tricky to do so for now simply
# catch the signal, we won't then be able to attach and will report a sensible
# error message to the user.
$SIG{PIPE} = 'IGNORE';

sub gdb_start {
    my ( $exe, $core ) = @_;
    my $gdb = {
        gdbpid   => -1,
        tracepid => -1,
        attached => 0,
        pa       => 0,
        lang     => 'auto',
        debug    => 0,
        seq      => 1,
    };

    my $cmd = 'gdb --interpreter=mi -q';
    if ( defined $core ) {
        $cmd .= " $exe $core";
    }

    $gdb->{gdbpid} = open3( $gdb->{wtr}, $gdb->{rdr}, $gdb->{err}, $cmd )
      or croak "Unable to popen() gdb: $!";

    if ( $gdb->{debug} ) {
        my ( $fh, $file ) = tempfile("/tmp/padb-gdb-debug-log-XXXXXX");
        $gdb->{debugfd} = $fh;
    }

    return $gdb;
}

sub gdb_quit {
    my ($gdb) = @_;
    if ( $gdb->{seq} == 1 ) {
        foreach my $fdname (qw(rdr wtr err)) {
            next unless exists $gdb->{$fdname};
            close $gdb->{$fdname};
        }
        if ( defined $gdb->{debugfd} ) {
            close $gdb->{debugfd};
        }
        return;
    }
    gdb_send( $gdb, 'quit' );
    waitpid $gdb->{gdbpid}, 0;
    foreach my $fdname (qw(rdr wtr err)) {
        next unless exists $gdb->{$fdname};
        close $gdb->{$fdname};
    }
    if ( defined $gdb->{debugfd} ) {
        close $gdb->{debugfd};
    }

    return;
}

sub gdb_attach {
    my ( $gdb, $pid ) = @_;

    if ($running_on_solaris) {
        my $exe = readlink("/proc/$pid/path/a.out");
        my %cs = gdb_n_send( $gdb, "file $exe" );
        if ( $cs{status} ne 'done' ) {
            return;
        }
    }

    send_cont_signal($pid);
    my %p = gdb_n_send( $gdb, "attach $pid" );

    if ( not defined $p{status} ) {
        $gdb->{error} = 'Failed to attach to process';
        if ( not find_exe('gdb') ) {
            $gdb->{error} = 'Failed to attach to process (is gdb installed?)';
        }
        return;
    }

    if ( $p{status} eq 'error' ) {
        my $r = gdb_parse_reason( $p{reason} );
        if ( defined $r->{msg} ) {
            $gdb->{error} = "Failed to attach to process: $r->{msg}";
            my $yama_file = "/proc/sys/kernel/yama/ptrace_scope";
            if ( -f $yama_file ) {
                my $yama = slurp_file($yama_file);
                if ( $yama != '0' ) {
                    $gdb->{error} =
"Failed to attach to process: $r->{msg} (try echo 0 > $yama_file)";
                }
            }
        } else {
            $gdb->{error} = 'Failed to attach to process';
        }
        return;
    }

    gdb_attach_post( $gdb, $pid );

    return $pid;
}

sub gdb_attach_post {
    my ( $gdb, $pid ) = @_;

    $gdb->{attached} = 1;
    $gdb->{tracepid} = $pid;

    $gdb->{maps} = read_maps($pid);

    my $open = gdb_read_value( $gdb, 'opal_version_string' );

    if ( defined $open ) {
        $gdb->{runtime}{ompi} = 1;
    }

    my $mpich2 = gdb_read_value( $gdb, 'MPID_GROUP' );

    if ( defined $mpich2 ) {
        $gdb->{runtime}{mpich2} = 1;
    }

    gdb_n_send( $gdb, '-gdb-set print address off' );
    gdb_n_send( $gdb, '-gdb-set language auto' );

    return;
}

sub gdb_attach_async_start {
    my ( $gdb, $pid ) = @_;

    if ($running_on_solaris) {
        my $exe = readlink("/proc/$pid/path/a.out");
        my %cs = gdb_n_send( $gdb, "file $exe" );
        if ( $cs{status} ne 'done' ) {
            return;
        }
    }

    send_cont_signal($pid);

    _gdb_send_real_async_start( $gdb, "attach $pid" );

    return;
}

sub gdb_attach_async_end {
    my ( $gdb, $pid ) = @_;

    my %p = _gdb_send_real_async_wait( $gdb, "attach $pid" );

    if ( not defined $p{status} ) {
        $gdb->{error} = 'Failed to attach to process';
        if ( not find_exe('gdb') ) {
            $gdb->{error} = 'Failed to attach to process (is gdb installed?)';
        }
        return;
    }

    if ( $p{status} eq 'error' ) {
        my $r = gdb_parse_reason( $p{reason} );
        if ( defined $r->{msg} ) {
            $gdb->{error} = "Failed to attach to process: $r->{msg}";
            my $yama_file = "/proc/sys/kernel/yama/ptrace_scope";
            if ( -f $yama_file ) {
                my $yama = slurp_file($yama_file);
                if ( $yama != '0' ) {
                    $gdb->{error} =
"Failed to attach to process!: $r->{msg} (try echo 0 > $yama_file)";
                }
            }

        } else {
            $gdb->{error} = 'Failed to attach to process';
        }
        return;
    }

    gdb_attach_post( $gdb, $pid );

    return $pid;
}

sub gdb_detach {
    my ($gdb) = @_;
    my $result = gdb_send( $gdb, '-target-detach' );

    return if ( $result eq 'error' );

    $gdb->{attached} = 0;

    send_cont_signal( $gdb->{tracepid} );
    return;
}

sub gdb_wait_for_prompt {
    my ($gdb) = shift;
    my $handle = $gdb->{rdr};
    while (<$handle>) {

        if ( defined $gdb->{debugfd} ) {
            print { $gdb->{debugfd} } $_;
        }
        return if /^\(gdb\)/;
    }

    return;
}

sub _gdb_send_real {
    my ( $gdb, $cmd ) = @_;
    gdb_wait_for_prompt($gdb);
    my $handle = $gdb->{wtr};
    my $seq    = $gdb->{seq}++;
    print {$handle} "$seq$cmd\n";
    if ( defined $gdb->{debugfd} ) {
        print { $gdb->{debugfd} } "$seq$cmd\n";
    }
    my %r = gdb_n_next_result( $gdb, $seq );
    if ( $gdb->{attached} and $r{seq} ne $seq ) {
        croak(
"Invalid sequence number from gdb, expecting $seq got $r{seq} cmd=\"$cmd\""
        );
    }
    $r{cmd} = $cmd;
    if ( $gdb->{debugfd} and defined $r{status} and $r{status} ne 'done' ) {
        print Dumper \%r;
    }
    return %r;
}

sub _gdb_send_real_async_start {
    my ( $gdb, $cmd ) = @_;
    gdb_wait_for_prompt($gdb);
    my $handle = $gdb->{wtr};
    my $seq    = $gdb->{seq}++;
    print {$handle} "$seq$cmd\n";
    if ( defined $gdb->{debugfd} ) {
        print { $gdb->{debugfd} } "$seq$cmd\n";
    }
    return;
}

sub _gdb_send_real_async_wait {
    my ( $gdb, $cmd ) = @_;
    my $seq = $gdb->{seq};
    my %r = gdb_n_next_result( $gdb, $seq );
    if ( $gdb->{attached} and $r{seq} ne $seq ) {
        croak(
"Invalid sequence number from gdb, expecting $seq got $r{seq} cmd=\"$cmd\""
        );
    }
    $r{cmd} = $cmd;
    if ( $gdb->{debugfd} and defined $r{status} and $r{status} ne 'done' ) {
        print Dumper \%r;
    }
    return %r;
}

sub _gdb_set_print_address {
    my ( $gdb, $flag ) = @_;

    if ( $flag == $gdb->{pa} ) {
        return;
    }

    $gdb->{pa} = $flag;

    if ($flag) {
        _gdb_send_real( $gdb, '-gdb-set print address on' );
    } else {
        _gdb_send_real( $gdb, '-gdb-set print address off' );
    }
    return;
}

sub _gdb_set_lang {
    my ( $gdb, $lang ) = @_;

    if ( $lang eq $gdb->{lang} ) {
        return;
    }

    $gdb->{lang} = $lang;

    _gdb_send_real( $gdb, "-gdb-set language $lang" );

    return;
}

sub gdb_n_send {
    my ( $gdb, $cmd ) = @_;
    _gdb_set_print_address( $gdb, 0 );
    _gdb_set_lang( $gdb, 'auto' );
    return _gdb_send_real( $gdb, $cmd );
}

# Send a command in the c language
sub gdb_send_c {
    my ( $gdb, $cmd ) = @_;
    _gdb_set_print_address( $gdb, 1 );
    _gdb_set_lang( $gdb, 'c' );
    return _gdb_send_real( $gdb, $cmd );
}

# Send a command with print address enabled.
sub gdb_send_addr {
    my ( $gdb, $cmd ) = @_;
    _gdb_set_print_address( $gdb, 1 );
    _gdb_set_lang( $gdb, 'auto' );
    return _gdb_send_real( $gdb, $cmd );
}

sub gdb_send {
    my ( $gdb, $cmd ) = @_;
    my %p = gdb_n_send( $gdb, $cmd );
    return $p{status};
}

sub gdb_strip_square {
    my $str = shift;
    if ( $str =~ m{\A\[(.*)\]\z}x ) {
        return $1;
    }
    croak "Cannot strip square braces from $str";
}

sub gdb_strip_braces {
    my ($str) = @_;
    if ( $str =~ m{\A{(.*)}\z}x ) {
        return $1;
    }
    croak "Cannot strip curly braces from $str";
}

sub gdb_strip_first_quotes {
    my ($str) = @_;

    # This is in part stolen from the "match a double-quoted string"
    # section of http://perldoc.perl.org/perlre.html
    if (
        $str =~ m{\A                    # Start of str.
                   "                    # Quote
                   ((?:[^"\\]+|\\.)*)   # Anyting which isn't \"
                   "                    # Close quote
                   ,?                   # An optional comma.
                   (.*)                 # Rest of line
                   \z                   # end.
                   }xms
      )
    {
        my $value = $1;
        my $rem   = $2;
        $value =~ s{\\\\}{\\}xg;
        $value =~ s{\\"}{"}xg;
        if ( not defined $rem or $rem eq $EMPTY_STRING ) {
            return $value;
        } else {
            return ( $value, $rem );
        }

    } else {
        croak("Failed to strip quotes from $str");
    }
    return;
}

# Has to return key (str) value (string) extra(string)
sub gdb_extract_value_square {
    my ($str) = @_;

    my $value  = $EMPTY_STRING;
    my $rem    = $str;
    my $indent = 0;

    # Walk through the string matching [ and ] until the number of ]
    # matches the number of [.
    while (
        $rem =~ m{\A          # Start of line.
                  ([^\[\]]*)  # Not a square brace.
                  ([\[\]])    # A Square brace.
                  ,?          # An optional comma
                  (.*)        # The rest of the line.
                  \z          # End of line.
                 }x
      )
    {
        my $br = $2;
        $rem = $3;
        $value .= $1 . $br;
        if ( $br eq "[" ) {
            $indent++;
        } else {
            $indent--;
            if ( $indent == 0 ) {
                if ( $rem eq $EMPTY_STRING ) {
                    return ( gdb_strip_square($value) );
                } else {
                    return ( gdb_strip_square($value), $rem );
                }
            }
        }

    }
    croak("Failed to extract square braces from $str");
}

sub gdb_extract_value_braces {
    my $str = shift;

    my $value  = $EMPTY_STRING;
    my $rem    = $str;
    my $indent = 0;

    while (
        $rem =~ m{
                      \A
                      ([^\{\}]*)
                      ([\{\}])
                      ,?
                      (.*)
                      \z
                     }x
      )
    {
        my $br = $2;
        $value .= $1 . $br;
        $rem = $3;
        if ( $br eq "{" ) {
            $indent++;
        } else {
            $indent--;
            if ( $indent == 0 ) {
                if ( $rem eq $EMPTY_STRING ) {
                    return ( gdb_strip_braces($value) );
                } else {
                    return ( gdb_strip_braces($value), $rem );
                }
            }
        }

    }
    croak("Failed to extrace square braces from $str");
}

# Take a string (from gdb) and a optional field to "collapse on", return a
# key, a value and a left-over string.  See comment and start of
# gdb_parse_reason for documentation.
sub gdb_new_parse {
    my ( $str, $collapse ) = @_;

    # All strings here start with a string.

    my ( $a, $b, $c ) = _gdb_new_parse( $str, $collapse );

    return ( $a, $b, $c );
}

sub _gdb_new_parse {
    my ( $str, $collapse ) = @_;

    my $key;
    my $value;

    # Strings here start with a string, " or {

    if ( $str =~ m{\A([\w\-\?]+)\=(.*)\z}x ) {
        $key   = $1;
        $value = $2;
    } elsif ( $str =~ m{\A\{}x ) {
        $value = $str;
    } elsif ( $str =~ m{\A"}x ) {
        $value = $str;
    } else {
        croak("Cannot parse gdb values from $str");
    }

    my $type = substr $value, 0, 1;
    if ( $type eq "[" ) {
        if ( $value eq "[]" ) {
            my @e;
            return ( $key, \@e );
        }
        my ( $l, $r ) = gdb_extract_value_square($value);

        my @b;
        while ( defined $l ) {
            my ( $kk, $vv );
            ( $kk, $vv, $l ) = _gdb_new_parse( $l, $collapse );

            if (   ( not defined $kk )
                or ( ( defined $collapse ) and ( $kk eq $collapse ) ) )
            {
                push @b, $vv;
            } else {
                push @b, { $kk => $vv };
            }
        }
        return ( $key, \@b, $r );
    } elsif ( $type eq "{" ) {
        my ( $l, $r ) = gdb_extract_value_braces($value);

        my @all;
        my %res;

        # gdb_extract_value_braces returns an empty string if the gdb
        # output contains {} so handle this case correctly here.
        while ( defined $l and $l ne "" ) {
            my ( $kk, $vv );
            ( $kk, $vv, $l ) = _gdb_new_parse( $l, $collapse );

            if (    defined $key
                and defined $collapse
                and $collapse eq 'thread-ids' )
            {
                push @all, { $kk => $vv };
            } else {
                $res{$kk} = $vv;
            }
        }
        if ( defined $key and defined $collapse ) {
            if ( $collapse eq 'thread-ids' ) {
                return ( $key, \@all, $r );
            }
        }
        return ( $key, \%res, $r );
    } elsif ( $type eq "\"" ) {
        my ( $this, $l ) = gdb_strip_first_quotes($value);
        return ( $key, $this, $l );
    } else {
        confess("unknown type '$type' str '$str'");
    }

    return;
}

# Convert from the single-line string gdb gives back to a abstract perl
# datatype.  The format gdb uses is documented here:
# http://sources.redhat.com/gdb/current/onlinedocs/gdb_26.html#SEC275
#
# The options $collapse argument here is for element names that should be
# collapsed into an array, for example in the following example each
# instance if thread-id would over-write the previous one with it's own
# value so to avoid this thread-ids (note the extra "s" here is passed as
# collapse to force them into an array.

# thread-ids={thread-id="6",thread-id="5",thread-id="4",thread-id="3",thread
# -id="2",thread-id="1"},current-thread-id="1",number-of-threads="6"

# $collapse is also specified for "frame" in a number of cases, this is to
# make the resulting datatype easier to parse rather than to prevent losing
# data as in the above case.

sub gdb_parse_reason {
    my ( $str, $collapse ) = @_;

    my $leftover = $str;
    my %res;
    while ( defined $leftover ) {
        my ( $key, $value );
        ( $key, $value, $leftover ) = gdb_new_parse( $leftover, $collapse );
        $res{$key} = $value;
    }

    return \%res;
}
#########################################################################

sub gdb_n_next_result {
    my ($gdb) = @_;
    my $handle = $gdb->{rdr};

    my %res;

    while (<$handle>) {

        if ( defined $gdb->{debugfd} ) {
            print { $gdb->{debugfd} } $_;
        }

        #printf("Line $_\n");
        return %res if /^\(gdb\)/;

        #if (/\~\"(.*)\"\n/) {    #"
        #    $res{raw} .= $1;
        #}
        #if (/\&\"(.*)\"\n/) {    #"
        #    $res{debug} .= $1;
        #}
        if (m{\A(\d+)\^(done|error),?(.*)\Z}x) {
            my $seq    = $1;
            my $status = $2;
            my $reason = $3;
            $res{status} = $status;
            $res{seq}    = $seq;
            if ( defined $reason and $reason ne $EMPTY_STRING ) {
                $res{reason} = $reason;
            }

            #if ( defined $res{raw} ) {
            #    $res{raw} =~ s/\\n/\n/g;
            #    chomp $res{raw};
            #}
            #if ( defined $res{debug} ) {
            #    $res{debug} =~ s/\\n/\n/g;
            #    chomp $res{debug};
            #}
            return %res;
        }
    }

    #if ( defined $res{raw} ) {
    #    $res{raw} =~ s/\\n/\n/g;
    #    chomp $res{raw};
    #}
    #if ( defined $res{debug} ) {
    #    $res{debug} =~ s/\\n/\n/g;
    #    chomp $res{debug};
    #}

    if ( $gdb->{attached} ) {
        croak("Unexpected EOF from gdb");
    }

    return %res;
}

sub gdb_strip_value {
    my ($str) = @_;
    if ( $str =~ m{\Avalue="([^"]+)"\z}x ) {
        return $1;
    }
    return;
}

sub gdb_type_size {
    my ( $gdb, $type ) = @_;
    my %p = gdb_n_send( $gdb, "-data-evaluate-expression \"sizeof($type)\"" );
    return unless ( $p{status} eq 'done' );
    return gdb_strip_value( $p{reason} );
}

# Add a void * cast here to stop gdb trying to evaluate the type and
# telling us that the offset isn't a valid pointer.  Without the cast gdb
# adding extra text after the value which is causing hex to throw warnings.
sub gdb_type_offset {
    my ( $gdb, $type, $field ) = @_;
    my %p = gdb_send_c( $gdb,
        "-data-evaluate-expression \"(void *)&(($type *)0)->$field\"" );
    return unless ( $p{status} eq 'done' );
    return hex gdb_strip_value( $p{reason} );
}

sub gdb_func_addr {
    my ( $gdb, $func ) = @_;
    my %p = gdb_send_addr( $gdb, "-data-evaluate-expression $func" );
    return unless ( $p{status} eq 'done' );
    my $value = gdb_strip_value( $p{reason} );
    my @a     = split $SPACE, $value;
    my $hex   = $a[-2];
    return $hex;
}

sub gdb_var_addr {
    my ( $gdb, $var ) = @_;
    my %p = gdb_send_addr( $gdb, "-data-evaluate-expression \"&($var)\"" );
    return unless ( $p{status} eq 'done' );
    return gdb_strip_value( $p{reason} );
}

sub gdb_read_raw {
    my ( $gdb, $ptr, $size ) = @_;

    my @d;
    my $offset = 0;
    my $count  = 256;
    do {
        $count = $size if ( $size < $count );
        my %p =
          gdb_n_send( $gdb, "-data-read-memory -o $offset $ptr x 1 1 $count" );
        $offset += $count;

        return unless ( $p{status} eq 'done' );
        my $val = gdb_parse_reason( $p{reason}, 'thread-ids' );
        push @d, @{ $val->{memory}[0]{data} };

    } while ( $offset < $size );
    return @d[ 0 .. $size - 1 ];
}

sub gdb_string {
    my ( $gdb, $len, $strp ) = @_;
    my $str = $EMPTY_STRING;
    my @s = gdb_read_raw( $gdb, $strp, $len );
    return if ( not defined $s[0] );
    foreach my $d (@s) {
        my $v = hex $d;
        return $str if ( $v == 0 );
        $str .= sprintf '%c', $v;
    }
    return $str;
}

sub gdb_read_pointer {
    my ( $gdb, $addr ) = @_;

    # Quote the request in case it contains spaces.
    my %t =
      gdb_send_addr( $gdb, "-data-evaluate-expression \"*(void **)$addr\"" );
    if ( $t{status} eq 'done' ) {
        my $v = gdb_parse_reason( $t{reason} );
        return $v->{value};
    }
    return;
}

sub gdb_read_int {
    my ( $gdb, $addr ) = @_;

    # Quote the request in case it contains spaces.
    my %t =
      gdb_send_addr( $gdb, "-data-evaluate-expression \"*(int *)$addr\"" );
    if ( $t{status} eq 'done' ) {
        my $v = gdb_parse_reason( $t{reason} );
        return $v->{value};
    }
    return;
}

sub gdb_read_value {
    my ( $gdb, $name ) = @_;

    # Quote the request in case it contains spaces.
    my %t = gdb_n_send( $gdb, "-data-evaluate-expression \"$name\"" );
    if ( $t{status} eq 'done' ) {
        my $v = gdb_parse_reason( $t{reason} );
        return $v->{value};
    }
    return;
}

sub gdb_read_value_addr {
    my ( $gdb, $name ) = @_;

    # Quote the request in case it contains spaces.
    my %t = gdb_send_addr( $gdb, "-data-evaluate-expression \"$name\"" );
    if ( $t{status} eq 'done' ) {
        my $v = gdb_parse_reason( $t{reason} );
        return $v->{value};
    }
    return;
}

# For a given type load it's type!  Takes a type name and returns (if
# possible) a hash representing the underlying type in the target.
# Works best if calls on structs where it returns the names and types
# if the struct entries.
sub gdb_load_type {
    my ( $gdb, $type ) = @_;

    my %t = gdb_n_send( $gdb, "-var-create - * $type" );

    return unless ( defined $t{status} and $t{status} eq 'done' );

    my $reason = gdb_parse_reason( $t{reason} );

    my %v = gdb_n_send( $gdb, "-var-list-children 1 $reason->{name}" );
    return unless ( defined $v{status} and $v{status} eq 'done' );

    my $z = gdb_parse_reason( $v{reason} );

    my %type;

    foreach my $c ( @{ $z->{children} } ) {
        my $child = $c->{child};

        $type{ $child->{exp} } = $child->{type};
    }

    return \%type;
}

sub minfo_handle_query {
    my ( $gdb, $vp, $query, $stats ) = @_;

    my ( undef, $cmd, @params ) = split $SPACE, $query;
    my $res;
    return 'fail' unless defined $cmd;

    $stats->{$cmd}++;

    my %dispatch_1 = (
        size => \&gdb_type_size,
        func => \&gdb_func_addr,
        sym  => \&gdb_var_addr,
    );

    my %dispatch_2 = (
        offset => \&gdb_type_offset,
        string => \&gdb_string,
    );

    if ( defined $dispatch_1{$cmd} ) {
        $res = $dispatch_1{$cmd}( $gdb, $params[0] );
    } elsif ( defined $dispatch_2{$cmd} ) {
        $res = $dispatch_2{$cmd}( $gdb, $params[0], $params[1] );
    } elsif ( $cmd eq 'data' ) {
        my @r = gdb_read_raw( $gdb, $params[0], $params[1] );
        if ( defined $r[0] ) {
            $res = "@r";
            $stats->{databytes} += $params[1];
        }
    } elsif ( $cmd eq 'rank' ) {
        $res = $vp;
    } elsif ( $cmd eq 'image' ) {
        my $image = proc_link( $gdb->{tracepid} );
        if ( defined $image ) {
            $res = $image;
        }
    } else {
        print "Unhandled query $query\n";
    }
    if ( defined $res ) {
        return "ok $res";
    }
    $stats->{errors}++;

    return 'fail';
}

sub run_minfo {
    my ( $carg, $gdb, $vp ) = @_;

    my $h = {
        hpid     => -1,
        tracepid => -1,
        attached => 0,
        debug    => 0,
    };

    $h->{fd}{err} = *M_ERROR;

    my @all_dll_filenames;

    # If supplied with a value of minfo then use it otherwise pick the
    # version that was installed with padb.
    my $minfo = $carg->{minfo};

    if ( not defined $minfo ) {
        $minfo = find_minfo();
    }

    if ( defined $carg->{mpi_dll} ) {
        push @all_dll_filenames, $carg->{mpi_dll};
    } else {
        my $loc = gdb_var_addr( $gdb, 'mpimsgq_dll_locations' );
        my $base;
        if ($loc) {
            $base = gdb_read_pointer( $gdb, $loc );
        }

        if ( defined $base and $base ne '0x0' ) {
            my $psize = gdb_type_size( $gdb, 'void *' );

            my $filename;

            do {
                my $strp = gdb_read_pointer( $gdb, $base );
                $filename = gdb_string( $gdb, 1024, $strp );
                if ( defined $filename ) {
                    push @all_dll_filenames, $filename;
                }
                $base = _hex($base) + $psize;
            } while ( defined $filename );
        }

        $base = gdb_var_addr( $gdb, 'MPIR_dll_name' );
        if ( not defined $base ) {
            target_error( $vp,
'Process does not appear to be using MPI (No MPIR_dll_name symbol)'
            );
            return;
        }
        my $filename = gdb_string( $gdb, 1024, $base );
        push @all_dll_filenames, $filename;
    }

    my @dll_filenames;

    my %files;
    foreach my $filename (@all_dll_filenames) {
        next unless -f ($filename);
        next if defined $files{$filename};

        push @dll_filenames, $filename;
        $files{$filename} = 1;
    }

    $h->{hpid} = open3( $h->{fd}{wtr}, $h->{fd}{rdr}, *M_ERROR, $minfo )
      or confess "Unable to popen() h: $!\n";

    if ( $h->{debug} ) {
        my ( $fh, $file ) = tempfile("/tmp/padb-minfo-debug-log-$vp-XXXXXX");
        $h->{debugfd} = $fh;
    }

    my $handle = $h->{fd}{rdr};

    my $out = $h->{fd}{wtr};

    my %stats;

    # Communicator data.
    my %communicator_descriptor;
    my %message_descriptor;

    my %global;

    $global{exit} = 'unknown';

    my @communicator_list;
    my $bytes_to_read;
    my $str_name;
    my $str_value = $EMPTY_STRING;
    my $str_global;
    while (<$handle>) {
        my $r = $_;

        if ( defined $h->{debugfd} ) {
            print { $h->{debugfd} } $r;
        }

        if ( defined $bytes_to_read ) {
            $str_value .= $r;
            if ( length $str_value eq $bytes_to_read + 1 ) {
                chomp $str_value;
                if ($str_global) {
                    $global{$str_name} = $str_value;

                    if ( $str_name eq 'ihqm' ) {
                        my $image = proc_link( $gdb->{tracepid} );
                        $str_value =~ s{%s}{$image};
                    }

                    if ( $str_name ne 'exit' and $str_name ne 'dmsg' ) {

                        # Report the string back to the outer process,
                        # don't bother forwarding exit status as that's
                        # done below.
                        target_key_pair( $vp, "minfo_msg_$str_name",
                            $str_value );
                        target_key_pair( $vp, 'minfo_msg', $str_name );
                    }
                } else {
                    $communicator_descriptor{$str_name} = $str_value;
                }
                $bytes_to_read = undef;
                $str_value     = "";
            }
            next;
        }

        chomp $r;
        my $cmd = substr $r, 0, 4;
        if ( $r eq 'req: dll_filename' ) {
            $stats{dll_files}++;
            my $filename = shift @dll_filenames;
            my $res      = 'fail';
            if ( defined $filename ) {
                $res = "ok $filename";
            }

            print {$out} "$res\n";

            if ( defined $h->{debugfd} ) {
                print { $h->{debugfd} } "$res\n";
            }

        } elsif ( $cmd eq 'req:' ) {
            my $res = minfo_handle_query( $gdb, $vp, $r, \%stats );

            # Some things *do* fail here, symbol lookups for example,
            # and we don't need to report it.

            print {$out} "$res\n";

            if ( defined $h->{debugfd} ) {
                print { $h->{debugfd} } "$res\n";
            }

        } elsif ( $cmd eq 'out:' ) {
            $stats{out}++;
            if (
                $r =~ m{\A
                        out:
                        [ ]
                        c:(\d+)
                        [ ]
                        (\w+):
                        (\d+)
                        [ ]?
                        (\w+)?
                        \z
                        }x
              )
            {
                my $cid   = $1;
                my $key   = $2;
                my $value = $3;
                my $name  = $4;

                if ( $key eq 'str' ) {
                    $bytes_to_read = $value;
                    $str_name      = $name;
                    $str_global    = 0;
                } elsif ( $key eq 'rt' ) {
                    push @{ $communicator_descriptor{rt} }, $value;
                } else {
                    $communicator_descriptor{$key} = $value;
                    $communicator_descriptor{mid} = $cid;
                }
            } else {
                target_error( $vp, "UNPARSEABLE MINFO: $r" );
            }
        } elsif ( $cmd eq 'Msg:' ) {
            $stats{msg}++;
            if (
                $r =~ m{\A
                        Msg:
                        [ ]
                        (\w+):
                        (-?\d+)
                        \z
                        }x
              )
            {
                my $key   = $1;
                my $value = $2;

                if ( $key eq 'start' ) {
                    undef %message_descriptor;
                } elsif ( $key eq 'done' ) {
                    push @{ $communicator_descriptor{messages} },
                      dclone( \%message_descriptor );
                    undef %message_descriptor;
                } else {
                    $message_descriptor{$key} = $value;
                }
            } else {
                target_error( $vp, "UNPARSEABLE MINFO: $r" );
            }
        } elsif ( $cmd eq 'zzz:' ) {
            $stats{zzz}++;
            if (
                $r =~ m{\A
                        zzz:
                        [ ]
                        (\w+):
                        (\d+)
                        [ ]?
                        (\w+)?
                        \z
                        }x
              )
            {
                my $key    = $1;
                my $length = $2;
                my $name   = $3;

                if ( $key eq 'str' ) {
                    $bytes_to_read = $length;
                    $str_name      = $name;
                    $str_global    = 1;
                }
            } else {
                target_error( $vp, "UNPARSEABLE MINFO: $r" );
            }
        } elsif ( $cmd eq 'done' ) {
            push @communicator_list, dclone( \%communicator_descriptor );
            undef %communicator_descriptor;
        } else {
            $stats{raw}++;
            push @{ $message_descriptor{raw} }, $r;
        }
    }

    # Belt and braces approach, minfo shouldn't have any output on
    # stderr which means that if it does then it's a bug.  Check that
    # and report it to the user as a problem.
    my $eh                  = $h->{fd}{err};
    my $stderr              = $EMPTY_STRING;
    my $have_error_messages = 0;
    while (<$eh>) {
        $have_error_messages = 1;
        $stderr .= $_;
    }
    if ($have_error_messages) {
        target_error( $vp, "Stderr from minfo:\n$stderr" );
    }

    my $sc = keys %stats;

    waitpid $h->{hpid}, 0;
    foreach my $fd ( values %{ $h->{fd} } ) {
        close $fd;
    }

    if ( $sc == 0 ) {

        # No interaction was had with minfo, abort with nothing.
        target_error( $vp, "Error running $minfo: No contact" );
        return;
    }

    if ( $global{exit} ne 'ok' ) {
        if ( $global{exit} eq 'die' ) {
            target_error( $vp, "Error message from $minfo: $global{dmsg}" );

        } else {
            target_error( $vp, "Error running $minfo: Bad exit code $?" );
        }
    }

    return minfo_to_array( \@communicator_list );

}

sub minfo_to_array {
    my ($cd) = @_;

    my @mq;
    foreach my $comm ( @{$cd} ) {

        push @mq, "comm$comm->{mid}: name: '$comm->{name}'";
        if ( defined $comm->{rank} ) {
            push @mq, "comm$comm->{mid}: rank: '$comm->{rank}'";
        }
        push @mq, "comm$comm->{mid}: size: '$comm->{size}'";
        my $id = sprintf "%#Lx", $comm->{id};
        push @mq, "comm$comm->{mid}: id: '$id'";

        for my $i ( 0 .. $#{ $comm->{rt} } ) {
            push @mq, "comm$comm->{mid}: Rank: local $i global $comm->{rt}[$i]";
        }

        my $mid = 0;

        foreach my $m ( @{ $comm->{messages} } ) {
            my @op_desc = qw(pending_send pending_receive unexpected_message);
            my @status_desc = qw(pending matched complete);
            my $op = "Operation $m->{op_type} ($op_desc[$m->{op_type}])";
            $op .= " status $m->{status} ($status_desc[$m->{status}])";
            push @mq, "msg$mid: $op";
            push @mq,
"msg$mid: Rank local $m->{desired_local_rank} global $m->{desired_global_rank}";
            if ( defined $m->{actual_global_rank} ) {
                push @mq,
"msg$mid: Actual local $m->{actual_local_rank} global $m->{actual_global_rank}";
            }

            if ( defined $m->{actual_length} ) {
                push @mq,
"msg$mid: Size desired $m->{desired_length} actual $m->{actual_length}";
            } else {
                push @mq, "msg$mid: Size desired $m->{desired_length}";
            }
            push @mq, "msg$mid: tag_wild $m->{tag_wild}";

            if ( defined $m->{actual_tag} ) {
                push @mq,
"msg$mid: Tag desired $m->{desired_tag} actual $m->{actual_tag}";
            } else {
                push @mq, "msg$mid: Tag desired $m->{desired_tag}";
            }

            push @mq, "msg$mid: system_buffer $m->{system_buffer}";

            foreach my $l ( @{ $m->{raw} } ) {
                push @mq, "msg$mid: $l";
            }

            $mid++;
        }
    }
    return @mq;
}

# Send a CONT signal to a pid, there have been problems where a program is
# in "T" state which causes the attach to hang forever.  Send the process a
# signal before attaching to wake it up in case this is the case.  gdb
# crashing (yes it does happen) is a common case for processes to be
# stopped so always deliver this signal before and after attaching.
sub send_cont_signal {
    my $pid = shift;
    kill 'CONT', $pid;
    return;
}

# Returns the MPI queues for this process given a gdb handle.
sub fetch_mpi_queue_gdb {
    my ( $carg, $vp, $pid, $g ) = @_;
    my @mq = run_minfo( $carg, $g, $vp );
    return @mq;
}

# Called as a backoff from the qsnet_show_tport_queue() function if it gets
# called but can't show the queues for any reason - most likely because it
# isn't actually a quadrics system.
sub show_mpi_queue {
    my ( $carg, $vp, $pid ) = @_;

    my $g = gdb_start();
    my $p = gdb_attach( $g, $pid );
    if ( !$p ) {
        if ( defined $g->{error} ) {
            target_error( $vp, $g->{error} );
        } else {
            target_error( $vp, 'Failed to attach to process' );
        }
        return;
    }

    my @mq = fetch_mpi_queue_gdb( $carg, $vp, $pid, $g );

    gdb_detach($g);
    gdb_quit($g);

    foreach my $o (@mq) {
        output( $vp, $o );
    }
    return;
}

# The mode handler for message queue or deadlock detection mode.
sub show_mpi_queue_one {
    my ( $carg, $proc ) = @_;

    my $vp  = $proc->{vp};
    my $pid = $proc->{pid};
    my $gdb = $proc->{gdb_handle};

    return unless $gdb;

    my @mq = fetch_mpi_queue_gdb( $carg, $vp, $pid, $gdb );

    foreach my $o (@mq) {
        output( $vp, $o );
    }
    return;
}

sub mpi_queue_output_handler {
    my ( $carg, $lines, $three ) = @_;

    my %headers = (
        ihqm            => 'Message from DLL',
        phqm            => 'Message from DLL',
        dllerror        => 'Error string from DLL',
        warning         => 'Warning message from minfo',
        dlldebugmessage => 'Debug message from DLL',
    );

    if ( exists $lines->{target_data}{minfo_msg} ) {
        my @keys = sort keys %{ $lines->{target_data}{minfo_msg} };

        foreach my $key (@keys) {
            my @values = keys %{ $lines->{target_data}{"minfo_msg_$key"} };
            my $head;
            if ( defined $headers{$key} ) {
                $head = $headers{$key};
            } else {
                $head = "Message from minfo/dll using unknown key: '$key'";
            }
            foreach my $value ( sort @values ) {
                print "----------------\n";
                printf

                  rng_convert_to_user(
                    $lines->{target_data}{"minfo_msg_$key"}{$value} )
                  . ": $head\n";
                print "----------------\n";
                printf "%s\n", $value;
            }
        }
    }

    default_output_handler( $three, $lines );
    return;
}

sub mpi_go_deadlock_detect_helper {
    my $str        = shift;    # tagged onto the end of the line.
    my $possessive = shift;    # syntax to use (possessive/attributive)
    my $size       = shift;    # size of the group
    my @identical  = (@_);     # member list
    my $ret;
    my $sstr = defined $size ? " (size $size)" : $EMPTY_STRING;

    my $members = 'members';
    my $are     = 'are';
    my $have    = 'have';

    if ( @identical == 1 ) {
        $members = 'member';
        $are     = 'is';
        $have    = 'has';
    }

    if ($possessive) {
        $are = $have;
    }

    $ret .= sprintf "Group $members %s$sstr $are $str.\n",
      rng_convert_to_user( rng_create_from_array(@identical) );

    return $ret;
}

sub mpi_go_deadlock_detect {
    my ( $carg, $cd ) = @_;

    my %ad;

    my %tg;

    if ( @target_groups != 0 ) {
        foreach my $gid (@target_groups) {
            $tg{$gid}++;
        }
    }

    my $no_data = 0;

    foreach my $process ( keys %{$cd} ) {
        my $rd = $cd->{$process};
        foreach my $g ( keys %{$rd} ) {
            my $gd  = $rd->{$g};
            my $gid = $gd->{id};

            if ( $gd->{size} == 1 ) {
                $gid = "$gd->{id}($process)";
            }
            if ( defined $gd->{ranks}{0} ) {
                $gid = "$gd->{id}($gd->{ranks}{0})";
            }

            if ( @target_groups != 0 ) {
                next unless defined $tg{$gid};
            }

            if ( $gd->{size} > 0 ) {
                $ad{$gid}{map}[ $gd->{rank} ] = $process;
            }
            $ad{$gid}{size} = $gd->{size};
            $ad{$gid}{name} = $gd->{name};
            if ( not exists $gd->{coll} ) {
                $no_data++;
            }
            foreach my $coll ( keys %{ $gd->{coll} } ) {
                my $count = $gd->{coll}{$coll}{count};
                if ( defined $gd->{coll}{$coll}{active} ) {
                    $ad{$gid}{active}{$coll}++;
                    $ad{$gid}{idents}{ $gd->{rank} }{active}{$coll} = $count;
                } else {
                    $ad{$gid}{idents}{ $gd->{rank} }{inactive}{$coll} = $count;
                }
            }
        }
    }

    my $ret     = $EMPTY_STRING;
    my $i_count = 0;               # Interesting groups.
         #foreach my $gid ( sort { $a <=> $b } keys %ad ) {

    foreach my $gid ( sort keys %ad ) {

        if ( @target_groups != 0 ) {
            next unless defined $tg{$gid};
        }

        my $gstr = "Information for group '$gid' ($ad{$gid}{name})\n";

        # Maybe show the group members, hope that the user doesn't
        # turn this on unless also setting target_groups!
        if ( $carg->{show_group_members} ) {
            $gstr .= "group has $ad{$gid}{size} members\n";
            if ( defined $ad{$gid}{size} ) {
                for ( my $ident = 0 ; $ident < $ad{$gid}{size} ; $ident++ ) {
                    $gstr .=
                      "group member[$ident] => grank[$ad{$gid}{map}[$ident]]\n";
                }
            }
        }

        if ( $ad{$gid}{active} ) {
            $i_count++;

            # For all collective calls which we are interested in
            foreach my $s ( keys %{ $ad{$gid}{active} } ) {
                my %active;
                my %inactive;

                foreach my $ident ( keys %{ $ad{$gid}{idents} } ) {
                    if ( defined $ad{$gid}{idents}{$ident}{active}
                        and $ad{$gid}{idents}{$ident}{active}{$s} )
                    {
                        my $number = $ad{$gid}{idents}{$ident}{active}{$s};
                        push @{ $active{$number} }, $ident;
                    } elsif ( $ad{$gid}{idents}{$ident}{inactive}{$s} ) {
                        my $number = $ad{$gid}{idents}{$ident}{inactive}{$s};
                        push @{ $inactive{$number} }, $ident;
                    }
                }
                foreach my $number ( sort keys %active ) {
                    $ret .= $gstr
                      . mpi_go_deadlock_detect_helper( "in call $number to $s",
                        0, $ad{$gid}{size}, @{ $active{$number} } );
                    $gstr = $EMPTY_STRING;

                }
                foreach my $number ( sort keys %inactive ) {
                    $ret .= mpi_go_deadlock_detect_helper(
                        "completed call $number to $s",
                        1, $ad{$gid}{size}, @{ $inactive{$number} } );
                }
            }
        } else {
            next unless ( $carg->{show_all_groups} );
            $ret .= $gstr;
            $gstr = $EMPTY_STRING;
        }

        {
            my @inactive;
            foreach my $ident ( sort keys %{ $ad{$gid}{idents} } ) {
                if ( not defined $ad{$gid}{idents}{$ident}{active} ) {
                    push @inactive, $ident;
                }
            }
            if ( @inactive != 0 ) {
                $ret .= $gstr
                  . mpi_go_deadlock_detect_helper(
                    'not in a call to the collectives',
                    0, $ad{$gid}{size}, @inactive );
                $gstr = $EMPTY_STRING;
            }
        }
    }

    my $count = keys %ad;

    if ( $count eq $no_data ) {
        $ret .=
          "Total: $count communicators, no communication data recorded.\n";
        return $ret;
    }

    if ( $count == 1 ) {
        my $use_str = ( $i_count == 1 ) ? $EMPTY_STRING : ' not';
        $ret .= "Total: $count communicators which is$use_str in use.\n";
    } else {
        my $i_str = ( $i_count == 1 ) ? 'is' : 'are';
        $ret .=
          "Total: $count communicators of which $i_count $i_str in use.\n";
    }
    $ret .= "No data was recorded for $no_data communicators\n";

    return $ret;
}

sub mpi_deadlock_detect {
    my ( $carg, $lines ) = @_;
    my $data;

    # XXX This is a bit of a hack to make the deadlock code work with input
    # files, the whole thing is due a tidy-up on the full-duplex branch
    # where this should be solved properly.
    if ( defined $lines->{target_output} ) {
        $data = $lines->{target_output};
    } else {
        $data = $lines->{lines};
    }

    my %coll_data;
    foreach my $rank ( keys %{$data} ) {
        my $r = $data->{$rank};
        my %lid;
        foreach my $line ( @{$r} ) {
            if ( $line =~ /^comm(\d+): (\w+): \'(.*)\'$/ ) {
                $lid{$1}{$2} = $3;
            } elsif ( $line =~ /^comm(\d+): Rank: local (\d+) global (\d+)$/ ) {
                $lid{$1}{ranks}{$2} = $3;
            } elsif ( $line =~
/^comm(\d+): Collective \'(\w+)\': call count (\d+), ([not ]*)active$/
              )
            {
                $lid{$1}{coll}{$2}{count} = $3;
                if ( $4 eq $EMPTY_STRING ) {
                    $lid{$1}{coll}{$2}{active} = 1;
                }
            } elsif ( $line =~ /^msg\d+/ ) {
                ;    # nop
            } else {

                #print "Failed to match minfo output: $line\n";
            }
        }
        $coll_data{$rank} = \%lid;
    }

    my $r = mpi_go_deadlock_detect( $carg, \%coll_data );
    print $r;
    return;
}

sub read_maps {
    my ($pid) = @_;

    my @regions;
    foreach my $rgn ( slurp_file("/proc/$pid/maps") ) {
        my ( $area, $perm, $offset, $time, $inode, $file ) =
          split $SPACE, $rgn;
        my ( $start, $end ) = split "-", $area;

        my %region = (
            start => _hex("0x$start"),
            end   => _hex("0x$end"),
            perm  => $perm
        );

        $region{file} = $file if ( defined $file and length $file );
        push @regions, \%region;
    }

    return \@regions;
}

sub describe_pointer {
    my ( $gdb, $ptr ) = @_;

    my $pval = _hex($ptr);

    if ( $ptr eq '0x0' ) {
        return pretify_variable('null pointer');
    }

    foreach my $rgn ( @{ $gdb->{maps} } ) {
        if ( $pval >= $rgn->{start} and $pval <= $rgn->{end} ) {
            if ( defined $rgn->{file} ) {
                return pretify_variable(
                    "valid pointer perm=$rgn->{perm} ($rgn->{file})");
            } else {
                return pretify_variable("valid pointer perm=$rgn->{perm}");
            }
        }
    }
    return pretify_variable("$ptr (Invalid pointer)");
}

sub expand_var_hash {
    my ( $gdb, $lookup, $type, $addr ) = @_;
    if ( defined $lookup->{$type} ) {
        my $fm = $lookup->{$type};
        my $r = gdb_read_pointer( $gdb, $addr );
        $fm =~ s{%s}{$r};
        my $str_loc = gdb_var_addr( $gdb, $fm );
        if ( defined $str_loc ) {
            my $value = gdb_string( $gdb, 1024, $str_loc );
            if ( defined $value and length $value > 0 ) {
                return $value;
            }
        }
    }
    return;
}

sub custom_expand_var {
    my ( $gdb, $type, $name, $addr ) = @_;

    my %lookup_generic = ( 'char *' => "%s", );

    my %lookup_open = (
        'MPI_Comm'              => "((struct ompi_communicator_t *)%s).c_name",
        'MPI_Datatype'          => "((struct ompi_datatype_t *)%s).name",
        'MPI_Op'                => "((struct ompi_op_t *)%s).o_name",
        'ompi_communicator_t *' => "((struct ompi_communicator_t *)%s).c_name",
        'ompi_datatype_t *'     => "((struct ompi_datatype_t *)%s).name",
        'ompi_op_t *'           => "((struct ompi_op_t *)%s).o_name",
    );

    my %lookup_mpich2 = ( 'MPID_Comm *' => "((MPID_Comm *)%s).name", );

    my $var_desc = expand_var_hash( $gdb, \%lookup_generic, $type, $addr );
    return $var_desc if defined $var_desc;

    if ( defined $gdb->{runtime}{ompi} ) {
        $var_desc = expand_var_hash( $gdb, \%lookup_open, $type, $addr );
        return $var_desc if defined $var_desc;
    }

    if ( defined $gdb->{runtime}{mpich2} ) {
        $var_desc = expand_var_hash( $gdb, \%lookup_mpich2, $type, $addr );
        return $var_desc if defined $var_desc;
    }

    return;

}

sub gdb_expand_var {
    my ( $gdb, $arg ) = @_;

    # If you try and read a value which claims to be optimised away it
    # will return a value of zero, hard to know how to handle this but
    # not reporting it is probably the better of the two options.
    return pretify_variable('value optimized out')
      if ( defined $arg->{value} and $arg->{value} eq '<value optimized out>' );

    # Char * pointers are already correctly handled by gdb with set
    # pointer off enabled so don't try and modify these.
    return if ( $arg->{type} eq 'char *' );

    my $type = $arg->{type};
    my $addr = gdb_var_addr( $gdb, $arg->{name} );

    # Strip out and struct from the given type as it makes no
    # difference to the code if it's there or not.
    if ( substr( $type, 0, 7 ) eq 'struct ' ) {
        $type = substr $type, 7;
    }

    # Check for custom types, these are individual, often run-time
    # specific types that have handlers defined for them.  Basically
    # we know what the type is so go in and extract the information we
    # need.
    if ( defined $addr ) {
        my $expanded_var =
          custom_expand_var( $gdb, $type, $arg->{name}, $addr );
        if ( defined $expanded_var ) {
            return pretify_variable($expanded_var);
        }
    }

    # If it's a pointer, firstly load it's value, then try and print
    # it, if that fails then describe it.
    if ( defined $type and substr( $type, -2 ) eq ' *' ) {
        my $pointer;

        if ( defined $addr ) {
            $pointer = gdb_read_pointer( $gdb, $addr );
        }

        if ( not defined $pointer ) {
            $pointer = gdb_read_value_addr( $gdb, $arg->{name} );
        }

        if ( $pointer eq '0x0' ) {
            return pretify_variable('null pointer');
        }

        # Try and print the contents of the pointer, this works fine
        # for types like 'double *' but produces lots of data for more
        # complex types, check the length here and reject it now if we
        # need to so that describe_later() can work.
        if ( defined $addr and $type ne 'void *' ) {
            my $value = gdb_read_value( $gdb, "*($type)$addr" );
            if ( defined $value and length $value <= 70 ) {
                return $value;
            }
        }

        # Describe the pointer by where it points to and what perms it
        # has.
        return describe_pointer( $gdb, $pointer );
    }

    # Some variables don't show up a value from list-locals,
    # __FUNCION__ and array pointers are two examples.  For vars where
    # the value isn't given automatically read the value of them
    # directly.
    return if defined $arg->{value};
    my $value = gdb_read_value( $gdb, $arg->{name} );
    if ( defined $value ) {
        return $value;
    }

    return;
}

sub gdb_expand_vars {
    my ( $gdb, $frame, $type ) = @_;

    foreach my $arg ( @{ $frame->{$type} } ) {
        my $value = gdb_expand_var( $gdb, $arg );
        if ( defined $value ) {
            $arg->{value} = $value;
        }
    }

    return;
}

sub gdb_dump_frames {
    my ( $gdb, $detail ) = @_;
    my %result = gdb_n_send( $gdb, '-stack-list-frames' );
    my $data = gdb_parse_reason( $result{reason}, 'frame' );
    if ( not defined $data->{stack} ) {
        return ( { error => $data->{msg} || 'unknown error' } );
    }
    if ( defined $detail ) {
        foreach my $frame ( @{ $data->{stack} } ) {
            gdb_send( $gdb, "-stack-select-frame $frame->{level}" );

            my %r = gdb_send_addr( $gdb,
                "-stack-list-arguments 2 $frame->{level} $frame->{level}" );
            my $args = gdb_parse_reason( $r{reason} );

            if ( defined $args->{'stack-args'}[0]{frame}{args} ) {
                my @names = @{ $args->{'stack-args'}[0]{frame}{args} };
                @{ $frame->{params} } = @names;

                gdb_expand_vars( $gdb, $frame, 'params' );
            }

            my %s = gdb_n_send( $gdb, '-stack-list-locals --simple-values' );
            if ( $s{status} eq 'done' ) {
                my $flocals = gdb_parse_reason( $s{reason} );
                if ( defined $flocals->{locals} ) {
                    $frame->{locals} = $flocals->{locals};
                    gdb_expand_vars( $gdb, $frame, 'locals' );
                }
            }
        }
    }
    return @{ $data->{stack} };
}

sub gdb_dump_frames_per_thread {
    my ( $gdb, $detail, $thread_id ) = @_;
    my @th = ();
    my %result = gdb_n_send( $gdb, '-thread-list-ids' );
    if ( $result{status} ne 'done' ) {
        return;
    }
    my $data = gdb_parse_reason( $result{reason}, 'thread-ids' );
    if ( not defined $data->{'thread-ids'} ) {
        return;
    }

    # I honestly don't know what this code is here for, presumably at
    # some point in the past I've experienced a version of gdb which
    # reports the number-of-threads as zero!  No harm in leaving the
    # code here however.
    if ( $data->{'number-of-threads'} == 0 ) {
        my %t = ( id => 0 );
        @{ $t{frames} } = gdb_dump_frames( $gdb, $detail );
        push @th, \%t;
        return @th;
    }

    # Solaris has a extra "LWP" thread for every process which gdb reports
    # as being a duplicate of the main thread for a process.  Skip loading
    # this thread as it doesn't add anything to the output rather muddies
    # the water somewhat.  What I don't know is if the LWP reports to be a
    # duplicate of the first thread, I suspect it's the last one so drop
    # the last thread from the list reported by gdb.  This needs verifying
    # and possibly changing by somebody who has access to both solaris and
    # multi threaded processes.
    if ($running_on_solaris) {
        pop @{ $data->{'thread-ids'} };
    }
    foreach my $thread ( @{ $data->{'thread-ids'} } ) {
        my $id = $thread->{'thread-id'};
        if ( defined $thread_id ) {
            next unless $thread_id eq $id;
        }
        my %t = ( id => $id );
        gdb_send( $gdb, "-thread-select $id" );
        @{ $t{frames} } = gdb_dump_frames( $gdb, $detail );
        push @th, \%t;
    }
    return @th;
}

# I've not run this function for a while, it would probably benefit form
# re-writing from scratch.
sub stack_from_core {
    my $exe  = shift;
    my $core = shift;

    my $gdb = gdb_start( $exe, $core );

    my %e = gdb_n_next_result($gdb);

    my $r = $e{raw};

    $r =~ s/\\n/\n/g;
    $r =~ s/\\"/\"/g;    #"
    $r =~ s/\\\\/\\/g;

    my @r = split "\n", $r;

    foreach my $l ( split "\n", $r ) {
        next if ( $l =~ m/^done/ );
        next if ( $l =~ m/^Loaded/ );
        next if ( $l =~ m/^Reading/ );
        next if ( $l =~ m/^Using/ );
        print "$l\n";
    }

    # Send a invalid command so the wait_for_prompt in dump_frames... can
    # work.  Should probably do this in gdb_start() and return the output
    # somehow.
    my $handle = $gdb->{wtr};
    print {$handle} "\n";

    print "\n";

    my @threads;
    if ( $conf{stack_shows_params} ) {
        @threads = gdb_dump_frames_per_thread( $gdb, 1 );
    } else {
        @threads = gdb_dump_frames_per_thread($gdb);
    }

    # This code is (almost 100%) lifted from stack_trace_from_pids, could
    # probably factor it out into it's own helper function.
    foreach my $thread ( sort { $a->{id} <=> $b->{id} } @threads ) {
        my @frames = @{ $thread->{frames} };

        print "ThreadId: $thread->{id}\n" if ( @threads != 1 );

        for ( my $i = $#frames ; $i >= 0 ; $i-- ) {
            my $frame = $frames[$i];

            print "ERROR: $$frame{error}\n"
              if exists $$frame{error};

            next unless exists $$frame{level};
            next unless exists $$frame{func};

            if ( $conf{stack_shows_params} ) {
                my @a;
                foreach my $arg ( @{ $frame->{params} } ) {
                    if ( defined $frame->{vals}{$arg} ) {
                        push @a, "$arg = $frame->{vals}{$arg}";
                    } else {
                        push @a, "$arg = ??";
                    }
                }
                my $a = join q{, }, @a;
                my $file = $frame->{file} || "?";
                my $line = $frame->{line} || "?";
                print "$frame->{func}($a) at $file:$line\n";

                if ( $conf{stack_shows_locals} ) {
                    foreach my $arg ( @{ $frame->{locals} } ) {
                        if ( defined $frame->{vals}{$arg} ) {
                            print "  $arg = $frame->{vals}{$arg}\n";
                        } else {
                            print "  $arg = ??\n";
                        }
                    }
                }
            } else {
                printf( ( $$frame{func} || '?' ) 
                    . '() at '
                      . ( $$frame{file} || '?' ) . ':'
                      . ( $$frame{line} || '?' )
                      . "\n" );
            }
        }
    }

    gdb_quit($gdb);
    return;
}

sub run_ptrack_cmd {
    my ( $vp, $pid, $cmd ) = @_;

    my $lines = 0;

    send_cont_signal($pid);
    open my $CMD, '-|', "$cmd 2>/dev/null"
      or p_die( $vp, "cant start command $cmd" );
    while (<$CMD>) {
        chomp $_;
        output( $vp, $_ );
        $lines++;
    }
    send_cont_signal($pid);
    close $CMD;
    return $lines;
}

sub run_command {
    my ( $vp, $cmd ) = @_;
    open my $CMDS, '-|', "$cmd" or p_die( $vp, 'cant fork subcommand' );
    while (<$CMDS>) {
        chomp $_;
        output( $vp, $_ );
    }
    close $CMDS;
    return;
}

sub get_remote_env_bygdb {
    my $pid = shift;

    my %env;
    my ( $fh, $filetmp ) = tempfile("/tmp/padb.XXXXXX");
    print $fh 'set pagination off';
    print $fh "\n";
    print $fh 'set $envp = *(char ***) &__environ';
    print $fh "\n";
    print $fh 'while (*$envp != 0)';
    print $fh "\n";
    print $fh 'printf "%s\n",*$envp';
    print $fh "\n";
    print $fh 'set $envp = $envp + 1';
    print $fh "\n";
    print $fh 'end';
    print $fh "\n";
    close $fh;
    my $psg = {
        rdr => "",
        wtr => "",
        err => "",
    };
    my $ret_pid;
    my $cmd = "gdb -nx -batch -x $filetmp -pid=";
    $cmd .= $pid;
    $ret_pid = open3( $psg->{wtr}, $psg->{rdr}, $psg->{err}, $cmd );
    my $handle = $psg->{rdr};

    while (<$handle>) {
        next if (/^\[/);
        next if (/^Using\s+/i);
        next if (/^0x/i);
        if (/=/) {
            chomp;
            my @f   = split "=";
            my $key = $f[0];
            if ( $f[1] !~ /^\(\)/ ) {    # not register function
                shift @f;
                $env{$key} = join( "=", @f );
            }
        }
    }
    close $psg->{wtr};
    close $psg->{rdr};
    close $psg->{err};
    waitpid( $ret_pid, 0 );
    unlink($filetmp);
    return %env;
}

sub get_remote_env {
    my $pid = shift;

    my %env;

    local $/ = "\0";
    open my $FD, '<', "/proc/$pid/environ" or return;
    while (<$FD>) {
        chomp;
        my @f   = split "=";
        my $key = $f[0];
        shift @f;
        $env{$key} = join q{=}, @f;
    }
    close $FD;
    return %env;
}

# Load the data about a given RMS job id, return a array of hashes
sub load_rms_procs {
    my $job = shift;

    # This is actually perfectly legitimate, it's because you can do for
    # example allocate -N4 prun -N2 <app>.  Because of the way prun -T
    # works (across a resource) not having a pids file isn't always a bad
    # thing.
    #
    # Of course it could mean that whatever jobs were supposed to be
    # running on this node aren't.
    open my $PIDFILE, '<', "/proc/rms/programs/$job/pids" or return;

    my @procs;

    while (<$PIDFILE>) {
        my ( $pid, $vp ) = split(' ');
        my %process;
        $process{pid} = $pid;
        if ( defined $vp and $vp != -1 ) {

            # Modern versions of RMS do the pid to vp translation for us
            # but report all unknown pids as -1.  Unknown in this case
            # means the rmsloaders and any processes which haven't called
            # elan_baseInit()
            $process{vp} = $vp;
        }
        push @procs, \%process;
    }
    close $PIDFILE;
    return @procs;
}

sub show_task_file {
    my ( $vp, $file, $prefix ) = @_;
    return unless ( -f $file );
    my @all = slurp_file($file);
    foreach my $l (@all) {
        chomp $l;
        if ( defined $prefix ) {
            proc_output( $vp, $prefix, $l );
        } else {
            if ( $l =~ m{\A(\w+):\s*(.+)}x ) {
                proc_output( $vp, $1, $2 );
            }
        }
    }
    return;
}

sub show_task_stat_file {
    my ( $vp, $file ) = @_;
    my @stat_names =
      qw(pid comm state ppid pgrp session tty_nr tpgid flags minflt
      cminflt majflt cmajflt utime stime cutime cstime priority nice
      num_threads itrealvalue starttime vsize rss rlim startcode endcode
      startstack kstkesp kstkeip signal blocked sigignore sigcatch wchan
      nswap cnswap exit_signal processor rt_ptiority policy
      delayacct_blkio_ticks guest_time cguest_time);
    return unless ( -f $file );

    foreach my $l ( slurp_file($file) ) {
        chomp $l;
        my @stats = split $SPACE, $l;
        foreach my $stat ( 0 .. $#stats ) {
            proc_output( $vp, "stat.$stat_names[$stat]", $stats[$stat] );
        }

    }
    return;
}

sub show_task_cmdline {
    my ( $vp, $dir ) = @_;

    local $/ = "\0";
    open my $FD, '<', "$dir/cmdline" or return;
    my @args;
    while (<$FD>) {
        chomp;
        push @args, $_;
    }
    proc_output( $vp, 'cmdline', "@args" );
    return;
}

sub show_task_dir {
    my ( $carg, $vp, $pid, $dir ) = @_;

    if ( $carg->{proc_shows_proc} ) {
        my $exe = readlink "$dir/exe";
        if ( defined $exe ) {
            proc_output( $vp, 'exe', $exe );
        }

        show_task_file( $vp, "$dir/status" );
        show_task_file( $vp, "$dir/wchan", 'wchan' );
        show_task_file( $vp, "$dir/stat", 'stat' );

        show_task_cmdline( $vp, $dir );

        if ( $carg->{proc_shows_stat} ) {
            show_task_stat_file( $vp, "$dir/stat" );
        }

        if ( -f "$dir/maps" ) {
            my %totals;
            foreach my $rgn ( slurp_file("$dir/maps") ) {
                my ( $area, $perm, $offset, $time, $inode, $file ) =
                  split $SPACE, $rgn;
                if ( $file =~ '/dev/elan4/sdram(\d+)' ) {
                    my $rail = $1;
                    my ( $start, $end ) = split "-", $area;
                    my $s     = _hex("0x$start");
                    my $e     = _hex("0x$end");
                    my $delta = $e - $s;
                    if ( defined $totals{$rail} ) {
                        $totals{$rail} += $delta;
                    } else {
                        $totals{$rail} = $delta;
                    }
                }
            }
            foreach my $rail ( sort keys %totals ) {
                my $total = $totals{$rail} / 1024;
                proc_output( $vp, "sdram$rail", "$total kb" );
            }
        }
    }

    if ( $carg->{proc_shows_fds} ) {
        my @fds = slurp_dir("$dir/fd");
        my @all_fddata;
        foreach my $fd (@fds) {
            next if ( $fd eq '.' );
            next if ( $fd eq '..' );
            my $target = readlink "$dir/fd/$fd";

            my %fdhash;
            $fdhash{target} = $target;
            $fdhash{fd}     = $fd;

            if ( -f "$dir/fdinfo/$fd" ) {
                foreach my $fdi ( slurp_file("$dir/fdinfo/$fd") ) {
                    if ( $fdi =~ m{\A(\w+):\s*(\d+)\Z}x ) {
                        $fdhash{$1} = $2;
                    }
                }
            }
            push @all_fddata, \%fdhash;
        }
        foreach my $fd (@all_fddata) {
            if ( defined $fd->{pos} ) {
                proc_output( $vp, "fd$fd->{fd}",
                    "$fd->{target} \($fd->{pos} $fd->{flags}\)" );
            } else {
                proc_output( $vp, "fd$fd->{fd}", $fd->{target} );
            }
        }
    }
    if ( $carg->{proc_shows_maps} ) {
        show_task_file( $vp, "$dir/maps", 'maps' );
    }
    return;
}

# Convert the first line of /proc/stat to elapsed jiffies.
sub string_to_jiffies {
    my ($ps) = @_;

    my @usecc = split $SPACE, $ps;

    my $jiffies = 0;

    # Remove the "cpu" prefix.
    shift @usecc;
    foreach my $usecv (@usecc) {
        $jiffies += $usecv;
    }
    return $jiffies;
}

sub add_and_divide_jiffies {
    my ( $pre, $post ) = @_;

    my $jiffies;

    my @pre = split $SPACE, $pre;

    return ( ( string_to_jiffies($pre) + string_to_jiffies($post) ) / 2 );
}

sub pcpu_user {
    my ( $cpucount, $elapsed, $start, $end ) = @_;
    my @pre  = split $SPACE, $start;
    my @post = split $SPACE, $end;
    my $jused = $post[13] - $pre[13];
    my $used  = ( $jused / $elapsed ) * $cpucount * 100;
    return sprintf '%d', $used;
}

sub pcpu_sys {
    my ( $cpucount, $elapsed, $start, $end ) = @_;
    my @pre  = split $SPACE, $start;
    my @post = split $SPACE, $end;
    my $jused = $post[14] - $pre[14];
    my $used  = ( $jused / $elapsed ) * $cpucount * 100;
    return sprintf '%d', $used;
}

sub pcpu_total {
    my ( $cpucount, $elapsed, $start, $end ) = @_;
    my @pre  = split $SPACE, $start;
    my @post = split $SPACE, $end;
    my $jused = $post[13] - $pre[13] + $post[14] - $pre[14];
    my $used  = ( $jused / $elapsed ) * $cpucount * 100;
    return sprintf '%d', $used;
}

my %proc_keys;

sub proc_output {
    my ( $vp, $key, $value ) = @_;
    if ( $inner_conf{mode} eq 'proc_summary' ) {
        if ( defined $proc_keys{ lc $key } ) {
            target_key_pair( $vp, lc $key, $value );
        }
    } else {
        output( $vp, "$key: $value" );
    }
    return;
}

sub show_proc_all {
    my ( $carg, $list ) = @_;

    %proc_keys = ();

    if ( defined $carg->{proc_format} ) {
        my @columns = split $COMMA, $carg->{proc_format};
        foreach my $column (@columns) {
            my ( $name, $desc ) = split $EQUALS, $column;
            $proc_keys{ lc $name } = 1;
        }
    }

    my @all;

    my $jiffies_start;
    my $load_avg;
    my $SFD;
    if ( $carg->{proc_shows_proc} ) {
        foreach my $proc ( @{$list} ) {
            my $pid = $proc->{pid};
            my $handle;
            if ( open $handle, '<', "/proc/$pid/stat" ) {
                $proc->{handle} = $handle;
            }
        }

        open $SFD, '<', '/proc/stat';

        # Begin critical path.
        my $stat = <$SFD>;

        foreach my $proc ( @{$list} ) {
            next unless ( defined $proc->{handle} );
            my $h = $proc->{handle};
            $proc->{stat_start} = <$h>;
            seek $proc->{handle}, 0, 0;
        }

        seek $SFD, 0, 0;
        my $stat2 = <$SFD>;

        # End critical path.

        $jiffies_start = add_and_divide_jiffies( $stat, $stat2 );
        open my $LFD, '<', '/proc/loadavg';
        $load_avg = <$LFD>;
        close $LFD;
    }

    foreach my $proc ( @{$list} ) {
        my $vp  = $proc->{vp};
        my $pid = $proc->{pid};
        show_proc( $carg, $vp, $pid );
    }

    if ( $carg->{proc_shows_proc} ) {

        # We only need to sleep if we are sampling the jiffy count for
        # this process, otherwise there is no need and it shows padb down
        # considerably.
        my $should_sleep = 1;
        if ( defined $carg->{proc_format} ) {
            $should_sleep = 0;
            if (   defined $proc_keys{pcpu}
                or defined $proc_keys{pucpu}
                or defined $proc_keys{pscpu} )
            {
                $should_sleep = 1;
            }
        }

        if ($should_sleep) {
            sleep 1;
        }

        seek $SFD, 0, 0;

        # Begin critical path.
        my $stat = <$SFD>;

        foreach my $proc ( @{$list} ) {
            next unless ( defined $proc->{handle} );
            my $h = $proc->{handle};
            $proc->{stat_end} = <$h>;
            close $proc->{handle};

            # Set handle to undef to prepare for future iterations of the
            # loop if being run in watch mode.  Without this handle remains
            # set but refers to the closed fd which causes errors next time
            # round the loop if the open fails.
            $proc->{handle} = undef;
        }

        seek $SFD, 0, 0;
        my $stat2 = <$SFD>;

        # End critical path.

        my $cpucount = 0;
        while (<$SFD>) {
            if ( $_ =~ m{\Acpu\d}x ) {
                $cpucount++;
            }
        }
        close $SFD;

        my $jiffies_end = add_and_divide_jiffies( $stat, $stat2 );

        my $elapsed = $jiffies_end - $jiffies_start;

        my ( $l1, $l5, $l15 ) = split $SPACE, $load_avg;

        foreach my $proc ( @{$list} ) {
            my $vp = $proc->{vp};

            proc_output( $vp, 'load1',  $l1 );
            proc_output( $vp, 'load5',  $l15 );
            proc_output( $vp, 'load15', $l15 );

            next unless defined $proc->{stat_end};

            if ( $elapsed > 0 ) {
                proc_output(
                    $vp, 'pcpu',
                    pcpu_total(
                        $cpucount,           $elapsed,
                        $proc->{stat_start}, $proc->{stat_end}
                    )
                );
                proc_output(
                    $vp, 'pucpu',
                    pcpu_user(
                        $cpucount,           $elapsed,
                        $proc->{stat_start}, $proc->{stat_end}
                    )
                );
                proc_output(
                    $vp, 'pscpu',
                    pcpu_sys(
                        $cpucount,           $elapsed,
                        $proc->{stat_start}, $proc->{stat_end}
                    )
                );
            }
        }
    }

    return;
}

sub show_proc {
    my ( $carg, $vp, $pid ) = @_;

    if ( $carg->{proc_shows_proc} ) {
        proc_output( $vp, 'hostname', $inner_conf{hostname} );
    }

    if ( -d "/proc/$pid/task" and $carg->{proc_shows_proc} ) {

        # 2.6 kernel. (ntpl)
        my @tasks = slurp_dir("/proc/$pid/task");
        foreach my $task (@tasks) {
            next if ( $task eq '.' );
            next if ( $task eq '..' );
            show_task_dir( $carg, $vp, $pid, "/proc/$pid/task/$task" );
            if ( defined $carg->{proc_format} ) {
                last;
            }
        }

        # We have to deduct 2 here to account for . and ..
        my $threads = @tasks - 2;
        proc_output( $vp, 'threads', $threads );
    } else {
        show_task_dir( $carg, $vp, $pid, "/proc/$pid" );
    }
    return;
}

sub show_stack_vars {
    my ( $vp, $frame, $type ) = @_;
    return unless defined $frame->{$type};
    return if ( @{ $frame->{$type} } == 0 );

    _format_local_vars( $frame->{$type} );
    output( $vp, "  $type:" );
    foreach my $arg ( @{ $frame->{$type} } ) {
        my $value = ( defined $arg->{value} ? $arg->{value} : '??' );
        output( $vp, "    $arg->{type_name} = $value" );
    }
    return;
}

# Attach to all local processes in preperation for calling the mode
# callback.  This function, along with the corresponding one below it
# handles persistent attachment between modes: modes specify if they
# want gdb handles or not, if they do then this function attaches for
# it, if they don't and gdb is attached this function will detach.
sub global_attach {
    my ( $mode, $procs ) = @_;

    if ( not $allfns{$mode}{needs_gdb} ) {
        global_detach($procs);
        return;
    }

    foreach my $proc ( @{$procs} ) {
        my $vp  = $proc->{vp};
        my $pid = $proc->{pid};

        next if defined $proc->{gdb_handle};

        $proc->{gdb_tmp} = gdb_start();
        gdb_attach_async_start( $proc->{gdb_tmp}, $pid );
    }

    foreach my $proc ( @{$procs} ) {

        next if defined $proc->{gdb_handle};

        my $vp  = $proc->{vp};
        my $pid = $proc->{pid};
        my $gdb = $proc->{gdb_tmp};

        delete $proc->{gdb_tmp};

        if ( gdb_attach_async_end( $gdb, $pid ) ) {
            $proc->{gdb_handle} = $gdb;
        } else {
            if ( defined $gdb->{error} ) {
                target_error( $vp, $gdb->{error} );
            } else {
                target_error( $vp, 'Failed to attach to process' );
            }
            gdb_quit($gdb);
        }
    }

    return;
}

# Detach from all local processes, this function is called from both
# global_attach and also when padb is exiting.
sub global_detach {
    my ($procs) = @_;

    foreach my $proc ( @{$procs} ) {
        if ( defined $proc->{gdb_handle} ) {
            gdb_detach( $proc->{gdb_handle} );
            delete $proc->{gdb_handle};
        }
    }
    return;
}

# Try and be clever here, attach to each and every process on this node
# first, then go back and query them each in turn, should mean that some
# processes are not spinning whilst gdb is doing it's thing which will mean
# a quicker runtime but also that the resulting stack traces will have less
# artifacts because running processes bunch up behind the non-running ones.

# We used to reguarly get garbage from gdb so there is a test here for
# finding main, with code to detach and try again if we don't.  This served
# us well on ia64 where gdb isn't very good however it as on most machines
# gdb gives you results below main (__libc_start_main()) this test fails
# which causes padb to loop a number of times for each process on a node.
# We still sometimes get garbage (due to hand-rolled memcpy()) so leave the
# loop in but don't sleep every iteration.  This could be handled better by
# checking for the presence of one of the stack_strip_below functions in
# the stack trace.
sub stack_trace_from_pid {
    my ( $carg, $proc ) = @_;

    my @all;

    my %above;
    my %below;

    if ( $carg->{strip_above_wait} ) {
        foreach ( split $COMMA, $carg->{stack_strip_above} ) {
            $above{$_} = 1;
        }
    }

    # Always do this here as we test for strip_below_main later and we want
    # to be able to test stack traces for the presence of main even if we
    # aren't stripping them.
    foreach ( split $COMMA, $carg->{stack_strip_below} ) {
        $below{$_} = 1;
    }

    return unless defined $proc->{gdb_handle};

    my $tries = 0;

    my @threads;

    my $vp  = $proc->{vp};
    my $pid = $proc->{pid};
    my $gdb = $proc->{gdb_handle};

    my $ok;
    do {

        # The first time round the loop we will have a gdb handle from
        # above, only re-attach if we have already failed on the first
        # try and are here a second time.
        if ( $tries > 0 ) {
            gdb_detach($gdb);
            gdb_quit($gdb);
            delete $proc->{gdb_handle};
            send_cont_signal($pid);
            $gdb = gdb_start();
            if ( gdb_attach( $gdb, $pid ) ) {
                $proc->{gdb_attach} = $gdb;
            } else {
                if ( defined $gdb->{error} ) {
                    target_error( $vp, $gdb->{error} );
                } else {
                    target_error( $vp, 'Failed to attach to process' );
                }
                gdb_quit($gdb);
                return;
            }
        }

        if (   $carg->{stack_shows_params}
            or $carg->{stack_shows_locals} )
        {
            @threads =
              gdb_dump_frames_per_thread( $gdb, 1, $carg->{thread_id} );
        } else {
            @threads =
              gdb_dump_frames_per_thread( $gdb, undef, $carg->{thread_id} );
        }

        if ( defined $threads[0]->{frames} ) {
            my @frames = @{ $threads[0]->{frames} };
            foreach my $frame (@frames) {
                if (    defined $frame->{func}
                    and defined $below{ $frame->{func} } )
                {
                    $ok = 1;
                    last;
                }
            }
        }

      } while ( ( not $ok )
        and ( $tries++ < $carg->{gdb_retry_count} ) );

    if ( not defined $threads[0]{id} ) {
        if ( $carg->{thread_id} ) {
            target_error( $vp,
                'Could not extract stack trace for specified thread' );
        } else {
            target_error( $vp,
                'Could not extract stack trace from application' );
        }
        return;
    }

    if ( defined $threads[0]{error} ) {
        target_error( $vp, $threads[0]{error} );
        return;
    }

    foreach my $thread ( sort { $a->{id} <=> $b->{id} } @threads ) {
        next unless defined $thread->{frames};
        my @frames = @{ $thread->{frames} };

        output( $vp, "ThreadId: $thread->{id}" ) if ( @threads != 1 );
        if ( $carg->{out_format} eq 'tree' ) {
            target_key_pair( $vp, 'thread_id', $thread->{id} );
        }

        my $strip_below;

        # Find a function to strip above.  Only actually enable this if
        # there is a function present which we are targeting or else no
        # output will be generated!  Do this in reverse order so we
        # strip as much as possible from the stack trace.
        if ( $carg->{strip_below_main} ) {
            foreach my $frame ( reverse @frames ) {
                next unless exists $frame->{func};
                if ( defined $below{ $frame->{func} } ) {
                    $strip_below = $frame->{func};
                }
            }
        }

        my @fl = $EMPTY_STRING;
        foreach my $frame ( reverse @frames ) {

            target_error( $vp, "error from gdb: $frame->{error}" )
              if exists $frame->{error};

            next unless exists $frame->{level};
            next unless exists $frame->{func};

            # This seemingly always gets set by gdb even if it is
            # sometimes set to '??'
            my $function = $frame->{func};

            next if ( defined $strip_below and $strip_below ne $function );

            $strip_below = undef;

            my $l = sprintf "%s() at %s:%s",
              $function,
              ( $frame->{file} || '?' ),
              ( $frame->{line} || '?' );

            if ( $carg->{out_format} eq 'tree' ) {

                output_namespace( $vp, $thread->{id}, $l );
                push @fl, $l;
                my $fl = join( ",", @fl );
                $fl = "$thread->{id}|$fl";
                if ( $carg->{stack_shows_locals} ) {
                    my @local_names;
                    foreach my $loc ( @{ $frame->{locals} } ) {
                        push @local_names, $loc->{name};
                        target_key_pair( $vp, "$l|var_type|$loc->{name}",
                            $loc->{type} );

                        if ( length $loc->{value} > 70 ) {
                            target_key_pair(
                                $vp,
                                $fl . '|var|' . $loc->{name},
                                pretify_variable('value too long to display')
                            );
                        } else {
                            target_key_pair( $vp, $fl . '|var|' . $loc->{name},
                                $loc->{value} );
                        }
                    }
                    if ( @local_names > 0 ) {
                        target_key_pair( $vp, "$l|locals",
                            join( q{,}, sort @local_names ) );
                    }
                }
                if ( $carg->{stack_shows_params} ) {

                    my @param_names;
                    foreach my $par ( @{ $frame->{params} } ) {
                        push @param_names, $par->{name};
                        target_key_pair( $vp, "$l|var_type|$par->{name}",
                            $par->{type} );
                        if ( length $par->{value} > 70 ) {
                            target_key_pair(
                                $vp,
                                $fl . '|var|' . $par->{name},
                                pretify_variable('value too long to display')
                            );
                        } else {
                            target_key_pair( $vp, $fl . '|var|' . $par->{name},
                                $par->{value} );
                        }
                    }
                    if ( @param_names > 0 ) {
                        target_key_pair( $vp, "$l|params",
                            join( q{,}, @param_names ) );
                    }
                }
            } else {
                output( $vp, $l );

                if ( $carg->{stack_shows_params} ) {
                    show_stack_vars( $vp, $frame, 'params' );
                }
                if ( $carg->{stack_shows_locals} ) {
                    show_stack_vars( $vp, $frame, 'locals' );
                }
            }

            # Strip below this function if we need to.
            if ( defined $above{$function} ) {
                last;
            }
        }
    }
    return;
}

sub thread_list_from_pid {
    my ( $carg, $proc ) = @_;

    return unless defined $proc->{gdb_handle};

    my $gdb = $proc->{gdb_handle};

    my %result = gdb_n_send( $gdb, '-thread-list-ids' );
    if ( $result{status} ne 'done' ) {
        return;
    }
    my $data = gdb_parse_reason( $result{reason}, 'thread-ids' );
    if ( not defined $data->{'thread-ids'} ) {
        return;
    }

    my @threads;
    foreach my $thread ( @{ $data->{'thread-ids'} } ) {
        my $id = $thread->{'thread-id'};
        push @threads, $id;
    }

    my $thread_list = join q{,}, sort { $a <=> $b } @threads;

    output( $proc->{vp}, $thread_list );
    return;
}

sub kill_proc {
    my ( $cargs, $vp, $pid ) = @_;
    my $signal = uc $cargs->{signal};
    kill $signal, $pid;
    return;
}

# Experimental, currently reports on what's on the node rather than
# what the specific process is attached to, hopefully this
# functionality will be added in the future.

# https://svn.open-mpi.org/trac/hwloc/ticket/21
sub lstopo {
    my ( $cargs, $vp, $pid ) = @_;

    if ( $cargs->{lstopo_show_warning} ) {
        target_error( $vp, "Reporting per node rather than per process" );
    }

    my $cmd = $cargs->{lstopo_command};

    $cmd =~ s{%p}{$pid}g;

    my @output = slurp_cmd($cmd);

    # Check the return code, if it's not found then there won't be any
    # output, if it was found but returned an error then do report the
    # output as it might be useful.
    my $rc = $?;
    if ( $rc != 0 ) {
        if ( not find_exe("lstopo") ) {
            target_error( $vp, "Error running lstopo: command not found" );
            return;
        } else {
            target_error( $vp, "Error running lstopo" );
        }
    }

    chomp @output;
    foreach my $line (@output) {
        output( $vp, $line );
    }
    return;
}

sub run_cmd_against_target {
    my ( $cargs, $rank, $pid ) = @_;

    my $cmd = $cargs->{command};

    $cmd =~ s{%p}{$pid}g;
    $cmd =~ s{%r}{$rank}g;

    my @output = slurp_cmd($cmd);
    chomp @output;
    foreach my $line (@output) {
        output( $rank, $line );
    }
    return;
}

sub ping_rank {
    my ( $cargs, $vp, $pid ) = @_;
    target_key_pair( $vp, 'ping', 'ACK' );
    output( $vp, 'ACK' );
    return;
}

sub qsnet_show_tport_queue {
    my ( $carg, $vp, $pid ) = @_;

    # If edb isn't installed (this isn't a Quadrics system) don't even try
    # and run edb as it'll fail, switch straight to minfo for the MPI
    # message queues instead.
    if ( !find_exe( $inner_conf{edb} ) ) {
        show_mpi_queue( $carg, $vp, $pid );
        return;
    }

    # Nobble the LD_LIBRARY_PATH to give etrace the best chance of working.
    my %remote_env = get_remote_env($pid);

    if ( defined $remote_env{LD_LIBRARY_PATH} ) {
        if ( defined $inner_conf{myld} ) {
            $ENV{LD_LIBRARY_PATH} =
              "$remote_env{LD_LIBRARY_PATH}:$inner_conf{myld}";
        } else {
            $ENV{LD_LIBRARY_PATH} = "$remote_env{LD_LIBRARY_PATH}";
        }
    }

    my $cmd = "$inner_conf{edb} --queues --pid=$pid";
    if ( defined $inner_conf{edbopt} ) {
        $cmd .= " $inner_conf{edbopt}";
    }
    my $lines = run_ptrack_cmd( $vp, $pid, $cmd );

    return if ( $lines != 0 );

    show_mpi_queue( $carg, $vp, $pid );
    return;
}

sub show_full_stack {
    my ( $vp, $pid, $file ) = @_;
    run_ptrack_cmd( $vp, $pid, "gdb -batch -x $file -p $pid" );
    return;
}

sub show_full_stacks {
    my ( $carg, $list ) = @_;

    my ( $fh, $file ) = tempfile('/tmp/padb.XXXXXXXX');
    print {$fh} "where full\n";
    print {$fh} "detach\n";
    close $fh;

    foreach my $proc ( @{$list} ) {
        show_full_stack( $proc->{vp}, $proc->{pid}, $file );
    }

    unlink $file;
    return;
}

sub set_debug {
    my ( $carg, $vp, $pid ) = @_;
    run_command( $vp,
        "edb --key $inner_conf{key} --debug=$carg->{dflag} --target-vp=$vp" );
    return;
}

my $mpi_watch_data = <<'EOF';
Barrier,b,elan_gsync,elan_hgsync,PMPI_Barrier,MPI_Barrier,shmem_barrier
Broadcast,B,elan_hbcast,elan_bcast,PMPI_Bcast,MPI_Bcast,shmem_bcast
AllGather,G,PMPI_Allgather,MPI_ALLgather
Gather,g,elan_gather,PMPI_Gather,MPI_Gather
AllReduce,R,PMPI_Allreduce,MPI_Allreduce
Reduce,r,elan_reduce,PMPI_Reduce,MPI_Reduce
alltoall,a,elan_alltoall,PMPI_Alltoall,MPI_Alltoall
alltoalls,A,elan_alltoalls
wait,w,elan_wait
EOF

# Load a file for use in MPI_Watch.
sub mpi_watch_load {
    my ($carg) = @_;

    # File is a csv file,
    # Name,c,function1,function2

    if ( defined $carg->{mpi_watch_file} ) {
        my %fns;

        my @d = slurp_file( $carg->{mpi_watch_file} );

        foreach my $mode (@d) {
            chomp $mode;
            my ( $name, $char, @fns ) = split $COMMA, $mode;
            $fns{names}{$name} = $char;
            foreach my $fn (@fns) {
                $fns{fns}{$fn} = $name;
            }
        }
        return \%fns;
    }

    my %fns;
    foreach my $mode ( split "\n", $mpi_watch_data ) {
        chomp $mode;
        my ( $name, $char, @fns ) = split $COMMA, $mode;
        $fns{names}{$name} = $char;
        foreach my $fn (@fns) {
            $fns{fns}{$fn} = $name;
        }
    }
    return \%fns;
}

# Legend:
#
# u - unexpected messages
# U - unexpected and other messages
# s - send messages only
# r - receive messages only
# m - send and receive messages
# . - no messages, consuming CPU
# - - sleeping
#
# * - error.

# Mpi watch mode.
#
# Report a single character for each process in the job, typically run with
# --watch enabled.
#
# Take care here when looping, $list is the same each iteration so if we
# store {gdb} in $list then it will be there next time round.  Fix this
# issue by populating a new array @all with every process which is present.
#
# This way if we are watching a job and the job goes away we can exit
# nicely rather than perpetually spew errors to the console.

sub mpi_watch_all {

    my ( $carg, $list ) = @_;
    my $fns = mpi_watch_load($carg);

    my @all;

    foreach my $proc ( @{$list} ) {
        my $vp  = $proc->{vp};
        my $pid = $proc->{pid};

        my %p;
        $p{vp}  = $vp;
        $p{pid} = $pid;

        # Load the status now before we attach with GDB, otherwise we'll
        # just see it as "T" (Stopped).
        my $m = find_from_status( $pid, 'State' );
        if ( not defined $m ) {
            target_error( $vp, 'No such process' );
            target_key_pair( $vp, 'state', '*' );
            next;
        }

        # Convert the string we get from find_from_status into a single
        # letter.
        $m = substr $m, 0, 1;

        if ( $m eq 'R' ) {
            $m = ',';
        } elsif ( $m eq 'S' ) {
            $m = '-';
        } else {
            $m = '*';
        }
        $p{state} = $m;

        my $gdb = gdb_start();
        if ( gdb_attach( $gdb, $pid ) ) {
            $p{gdb} = $gdb;
        } else {
            target_key_pair( $vp, 'state', $p{state} );
            if ( defined $gdb->{error} ) {
                target_error( $vp, $gdb->{error} );
            } else {
                target_error( $vp, 'Failed to attach to process' );
            }
        }
        push @all, \%p;
    }

    foreach my $proc (@all) {
        my $vp  = $proc->{vp};
        my $pid = $proc->{pid};

        my $gdb = $proc->{gdb};

        my @mq;
        my $sm   = 0;
        my $rm   = 0;
        my $um   = 0;
        my $good = '.';
        my $fnmode;

        @mq = fetch_mpi_queue_gdb( $carg, $vp, $pid, $gdb );

        if ( @mq == 0 ) {
            $good = ',';
        } else {
            foreach my $o (@mq) {
                if ( $o =~ /Operation (\d)/ ) {
                    my $type = $1;
                    $sm++ if ( $type == 0 );
                    $rm++ if ( $type == 1 );
                    $um++ if ( $type == 2 );
                }
            }
        }

        my $mt = ( grep { $_ } ( $sm, $rm, $um ) );
        if ( $mt != 0 ) {
            my $mode = '*';

            if ($um) {
                $mode = 'u';
                $mode = 'U' if ( $mt != 1 );
            } else {
                if ( $mt == 1 ) {
                    $mode = 's' if ($sm);
                    $mode = 'r' if ($rm);
                } else {
                    $mode = 'm';
                }
            }
            target_key_pair( $vp, 'state', $mode );
            next;
        }

        my @threads = gdb_dump_frames_per_thread($gdb);

        foreach my $thread ( sort { $a->{id} <=> $b->{id} } @threads ) {
            my @frames = @{ $thread->{frames} };
            foreach my $i ( reverse 0 .. $#frames ) {
                my $frame = $frames[$i];
                if ( defined $fns->{fns}{ $frame->{func} } ) {
                    $fnmode = $fns->{fns}{ $frame->{func} };
                    last;
                }
            }
        }

        if ( defined $fnmode ) {
            target_key_pair( $vp, 'state', $fns->{names}{$fnmode} );
            next;

        }

        # Fall through case.
        target_key_pair( $vp, 'state', $proc->{state} );
    }

    foreach my $proc (@all) {
        gdb_detach( $proc->{gdb} );
        gdb_quit( $proc->{gdb} );
    }
    return;
}

# To be called from the find_pids resource manager callback to say
# that the specified pid is the specified rank.  This process should
# be one spawned by the resource manager, if wrapper scripts are being
# used, say "mpirun -n 2 sh -c myapp" then this function should be
# called with the pid of 'sh', padb will then walk the process tree to
# find the more interesting child process and target that one.
sub register_target_process {
    my ( $rank, $pid ) = @_;

    $inner_conf{rmpids}{$pid}{rank} = $rank;
    return;
}

sub find_from_status {
    my ( $pid, $key ) = @_;

    if ($running_on_solaris) {
        my %key_lookup = (
            PPid  => 'ppid',
            Name  => 'comm',
            State => 's',
        );
        my $ps_key = $key_lookup{$key};
        my @res    = slurp_cmd("ps -o $ps_key= -p $pid");
        return if ( @res == 0 );
        chomp @res;
        my $proc = $res[0];
        $proc =~ s{\s*}{}g;
        return $proc;
    }

    foreach my $l ( slurp_file("/proc/$pid/status") ) {
        if ( $l =~ m{\A(\w+):\s*(.+)}x ) {
            if ( $1 eq $key ) { return $2; }
        }
    }
    return;
}

sub hash_from_status {
    my ($pid) = @_;
    my %status;
    my @pairs = slurp_file("/proc/$pid/status");
    return unless @pairs;
    foreach my $pair (@pairs) {
        if ( $pair =~ m{\A(\w+):\s*(.+)}x ) {
            $status{$1} = $2;
        }
    }
    return \%status;
}

sub is_resmgr_process {
    my $pid  = shift;
    my $name = find_from_status( $pid, 'Name' );
    my $mgrs = {
        rmsloader  => 1,
        slurmd     => 1,
        slurmstepd => 1,
        pbs_attach => 1,
        orted      => 1,
        mpirun     => 1,
    };
    return 1 if ( defined $mgrs->{$name} );
    return;
}

# Report the pids as reported by slurm, don't worry about tracing children or
# anything at this stage.
sub slurm_find_pids {
    my $jobid = shift;

    my @procs =
      slurp_cmd("scontrol listpids $jobid.$inner_conf{slurm_job_step}");

    my $found_target;

    foreach my $proc (@procs) {
        my ( $pid, $job, $step, undef, $global ) = split $SPACE, $proc;
        next if ( $global eq '-' );
        next unless ( $job eq $jobid );
        next unless ( $step == $inner_conf{slurm_job_step} );
        next if ( is_resmgr_process($pid) );
        register_target_process( $global, $pid );
        $found_target = 1;
    }
    return if $found_target;

    # Either we didn't find any processes on this node or we only
    # found processes named orted.  This could be for two reasons:
    # The job step might not be running on this node.
    # The job step might be a openmpi salloc/orterun combination.
    # If it's the latter then this node could either be the "head"
    # node where the mpirun is running or a "remote" node where the
    # job will be launched by orted.

    # Search the process list for processes which belong to this job
    # and either belong to this job step or don't state which job step
    # they belong to.
    foreach my $pid ( get_process_list($target_user) ) {

        # Skip over resource manager processes.
        next if ( is_resmgr_process($pid) );

        # Skip over ones which aren't direct descendants of a resource manager
        next unless is_parent_resmgr($pid);

        my $vp;
        my %env = get_remote_env($pid);

        next unless defined $env{SLURM_JOB_ID};
        next if ( $env{SLURM_JOB_ID} != $jobid );

        next unless defined $env{OMPI_COMM_WORLD_RANK};

        # If this is defined check it's correct, it might be missing though.
        if ( defined $env{SLURM_JOB_STEP} ) {
            next if $env{SLURM_JOB_STEP} != $inner_conf{slurm_job_step};
        }

        if ( defined $env{OMPI_COMM_WORLD_SIZE} ) {
            target_key_pair( $vp, "JOB_SIZE", $env{OMPI_COMM_WORLD_SIZE} );
        }

        register_target_process( $env{OMPI_COMM_WORLD_RANK}, $pid );
    }

    return;
}

#
# PBS support
#
# Find if parent is resource manager or not
# there's two cases:
# 1- parent immediately is pbs_attach, Ok it's true
# 2- no parent immediately is pbs_attach, so find until mpd & parent is root
#
# take care of no existing process or immediate parent is 1 or 0
# or proc itself is 'pbs_attach'
#
# reason to write this:
#    in new version of pbspro there's no more pbs_attach
#    and in this case padb fails to find any rank procs
#
# example:
#UID        PID  PPID  C STIME TTY   TIME     CMD
#thipa    23562     1  0 13:28 ?     00:00:00 python /opt/mpi/mpibull2-1.3.7-1.t/bin/mpd.py
#thipa    23563 23562  0 13:28 ?     00:00:00 python /opt/mpi/mpibull2-1.3.7-1.t/bin/mpd.py
#thipa    23564 23562  0 13:28 ?     00:00:00 python /opt/mpi/mpibull2-1.3.7-1.t/bin/mpd.py
#thipa    23565 23563  0 13:28 ?     00:00:00 ./pp_sndrcv_spbl
#thipa    23566 23564  0 13:28 ?     00:00:00 ./pp_sndrcv_spbl
#
#
sub is_parent_resmgr_pbs {
    my $input_pid = shift;
    my $result;
    my $parent_pid = find_from_status( $input_pid, 'PPid' );
    my $name_pid   = find_from_status( $input_pid, 'Name' );
    return
      if ( !defined($parent_pid)
        || $parent_pid == 1
        || $parent_pid == 0
        || $name_pid eq 'pbs_attach' );

    # loop to find its parents
    my $pid  = $parent_pid;
    my $loop = 0;
    for ( ; ; ) {

        # find PPid of this pid
        my $ppid = find_from_status( $pid, 'PPid' );
        my @name = slurp_file("/proc/$pid/cmdline");
        $loop++;
        my $line = $name[0];
        my @champs = split( /\0+/, $line );
        if ( $loop == 1 ) {
            if ( $champs[0] eq 'pbs_attach' ) {
                $result = 1;    # OK parent is resm
                last;
            }
            if ( $ppid == 1 || $ppid == 0 ) {
                last;           # parent is root last anyway
            }
        } elsif ( $loop >= 2 ) {
            if ( $ppid == 1 || $ppid == 0 ) {
                if ( defined( $champs[1] ) && $champs[1] =~ /mpd.py/ ) {
                    $result = 1    # OK parent is resm
                }
                last;              # parent is root last anyway
            }
        }
        $pid = $ppid;
    }
    return ($result);
}

#
sub pbs_find_pids {
    my $job = shift;

    if ( defined $inner_conf{pbs_server} ) {
        $job .= ".$inner_conf{pbs_server}";
    }

    my %vps;

    # Iterate over all processes for this user
    foreach my $pid ( get_process_list($target_user) ) {

        # Skip over resource manager processes.
        next if ( is_resmgr_process($pid) );

        # Skip over ones which aren't direct descendants of a resource manager
        next unless is_parent_resmgr_pbs($pid);

        my $vp;
        my %env = get_remote_env($pid);
        if ( !defined( $env{PBS_JOBID} ) || !defined( $env{PMI_RANK} ) ) {
            %env = get_remote_env_bygdb($pid);
        }

        if ( defined( $env{PBS_JOBID} ) && $env{PBS_JOBID} eq $job ) {
            $vp = $env{PMI_RANK};
        }
        if ( defined $vp ) {
            $vps{$vp} = $pid;
        }
    }
    foreach my $vp ( keys %vps ) {
        my $pid = $vps{$vp};
        register_target_process( $vp, $pid );
    }
    return;
}

#
# LSF-openmpi support and LSF-mpich2
#
sub lsfmpi_get_proc {
    my $job = shift;
    my @proc;
    my $rank_pid  = 1;
    my $rank_ppid = 2;
    my $rank_cmd  = 3;
    my $port;
    my ( $server, $mpirun_pid );
    $port       = $inner_conf{lsfmpi_port};
    $server     = $inner_conf{lsfmpi_server};
    $mpirun_pid = $inner_conf{lsfmpi_mpirpid};
    my $cmd      = "ps -o uid,pid,ppid,cmd -u $target_user";
    my @handle   = slurp_cmd($cmd);
    my $hostname = hostname();

    if ( $hostname eq $server ) {

        #this is the server
        #get all mpirun children, it should be TaskStarter pids
        #get all TaskStarter children it should be the appli pids
        my @ppid_proc;
        @ppid_proc =
          get_pids_ppid( $mpirun_pid, $rank_pid, $rank_ppid, @handle );
        foreach my $pid (@ppid_proc) {
            my @w_proc = get_pids_ppid( $pid, $rank_pid, $rank_ppid, @handle );
            push( @proc, @w_proc );
        }
    }
    if ( $#proc == -1 ) {    # nothing in @proc so try from port
            # all cases including case host=server which failed
            # get all TaskStarter that matched port num
            # get all TaskStarter children it should be the appli pids
        my @ppid_proc;
        @ppid_proc =
          get_pids_fromport( $port, $rank_pid, $rank_ppid, $rank_cmd, @handle );
        foreach my $pid (@ppid_proc) {
            my @w_proc = get_pids_ppid( $pid, $rank_pid, $rank_ppid, @handle );
            push( @proc, @w_proc );
        }
    }
    return @proc;
}

sub lsfmpi_find_pids {
    my $job = shift;
    my %vps;

    # Iterate over all processes for this user
    if ( $inner_conf{lsf_mode} eq 'mpich2' ) {
        foreach my $pid ( lsfmpi_get_proc($job) ) {

            my $vp;
            my %env = get_remote_env($pid);
            if ( !defined( $env{LSB_JOBID} ) || !defined( $env{PMI_RANK} ) ) {
                %env = get_remote_env_bygdb($pid);
            }

            if ( $env{LSB_JOBID} eq $job ) {
                $vp = $env{PMI_RANK};
            }
            if ( defined $vp ) {
                $vps{$vp} = $pid;
            }
        }
    } else {

        # lsf_mode eq 'openmpi'
        foreach my $pid ( lsfmpi_get_proc($job) ) {

            my $vp;
            my %env = get_remote_env($pid);
            if (   !defined( $env{OMPI_COMM_WORLD_SIZE} )
                || !defined( $env{OMPI_COMM_WORLD_RANK} ) )
            {
                %env = get_remote_env_bygdb($pid);
            }

            $vp = $env{OMPI_COMM_WORLD_RANK};
            if ( defined $vp ) {
                $vps{$vp} = $pid;
            }
        }
    }
    foreach my $vp ( keys %vps ) {
        my $pid = $vps{$vp};
        register_target_process( $vp, $pid );
    }
}

sub rms_find_pids {
    my $jobid = shift;

    my %vps;

    my @procs = load_rms_procs($jobid);

    foreach my $proc (@procs) {

        my $vp = $proc->{vp};

        # With any luck we have a new RMS and vp is extracted from /proc
        # Otherwise try and pick it out of the environment in a sane way if
        # that fails report errors for any process with isn't rmsloader

        # Strip or rmsloader and slurm[step]d;
        next if ( is_resmgr_process( $proc->{pid} ) );

        # If we aren't known to be the vp and we are not a direct
        # descendant of the resource manager then skip over to the next
        # process.
        next if ( not defined $vp and not is_parent_resmgr( $proc->{pid} ) );

        my $found = 'actual';

        if ( not defined $vp ) {
            $found = 'likely';

            my %env = get_remote_env( $proc->{pid} );

            if ( defined $env{RMS_PROCID} ) {
                $vp = $env{RMS_PROCID};
            } elsif ( defined $env{SLURM_PROCID} ) {
                $vp = $env{SLURM_PROCID};
            } else {
                next;
            }
        }

        push @{ $vps{$vp}{$found} }, $proc->{pid};
    }

    foreach my $vp ( keys %vps ) {
        if ( defined $vps{$vp}{actual} ) {
            foreach my $pid ( @{ $vps{$vp}{actual} } ) {
                register_target_process( $vp, $pid );
            }
        } else {
            foreach my $pid ( @{ $vps{$vp}{likely} } ) {
                register_target_process( $vp, $pid );
            }
        }
    }
    return;
}

sub inner_show_stats {
    my $jobid = shift;
    my $key   = ( $jobid << 9 ) - 1;
    run_command( undef,
        "$inner_conf{edb} --stats-raw --parallel --key=$key $inner_conf{edbopt}"
    );
    return;
}

# Receive a reply from a child.  If it's the last reply then combine with
# others and forward to parent.
sub reply_from_child {
    my ( $handle, $sd, $req ) = @_;

    # If it's the first connection over this socket simply foreward on the
    # signon command.
    if ( $req eq 'Welcome' ) {
        $sd->{socket}->printf("$handle->{signon_cmd}\n");
        return;
    }

    my $r = my_decode($req);

    # Merge this reply into the local one.
    $handle->{child_replys}++;

    # Combine the host responses.
    foreach my $status ( keys %{ $r->{host_responce} } ) {
        foreach my $host ( keys %{ $r->{host_responce}{$status} } ) {
            $handle->{all_replys}->{host_responce}{$status}{$host} =
              $r->{host_responce}{$status}{$host};
        }
    }

    # Combine the target process responses.
    if ( exists $r->{target_response} ) {
        foreach my $tp ( keys %{ $r->{target_response} } ) {
            $handle->{all_replys}->{target_response}{$tp} =
              $r->{target_response}{$tp};
        }
    }

    # Combine the target process responses from child.
    if ( exists $r->{target_output} ) {
        foreach my $tp ( keys %{ $r->{target_output} } ) {
            $handle->{all_replys}->{target_output}{$tp} =
              $r->{target_output}{$tp};
        }
    }

    # Combine the target process responses from child.
    if ( exists $r->{target_ns_output} ) {
        foreach my $tp ( keys %{ $r->{target_ns_output} } ) {
            $handle->{all_replys}->{target_ns_output}{$tp} =
              $r->{target_ns_output}{$tp};
        }
    }

    # Copy the target local responses.
    if ( exists $handle->{target_response} ) {
        foreach my $tp ( keys %{ $handle->{target_response} } ) {
            $handle->{all_replys}->{target_response}{$tp} =
              $handle->{target_response}{$tp};
        }
    }

    # Save any output we've got from this node.
    foreach my $key ( keys %inner_output ) {
        $handle->{all_replys}->{target_output}{$key} = $inner_output{$key};
    }

    %inner_output = ();

    # Save any output we've got from this node.
    foreach my $key ( keys %inner_ns_output ) {
        $handle->{all_replys}->{target_ns_output}{$key} =
          $inner_ns_output{$key};
    }
    %inner_ns_output = ();

    # Copy the network target errors into response.
    if ( exists $r->{target_data} ) {
        if ( exists $handle->{all_replys}->{target_data} ) {
            foreach my $key ( keys %{ $r->{target_data} } ) {
                foreach my $value ( keys %{ $r->{target_data}{$key} } ) {
                    if (
                        defined $handle->{all_replys}
                        ->{target_data}{$key}{$value} )
                    {
                        rng_merge(
                            $handle->{all_replys}->{target_data}{$key}{$value},
                            $r->{target_data}{$key}{$value}
                        );
                    } else {
                        $handle->{all_replys}->{target_data}{$key}{$value} =
                          $r->{target_data}{$key}{$value};
                    }
                }
            }
        } else {
            $handle->{all_replys}->{target_data} = $r->{target_data};
        }
    }

    # Merge in local target responses.
    foreach my $key ( keys %local_target_data ) {
        foreach my $value ( keys %{ $local_target_data{$key} } ) {
            if ( defined $handle->{all_replys}->{target_data}{$key}{$value} ) {
                rng_merge( $handle->{all_replys}->{target_data}{$key}{$value},
                    $local_target_data{$key}{$value} );
            } else {
                $handle->{all_replys}->{target_data}{$key}{$value} =
                  $local_target_data{$key}{$value};
            }
        }
    }

    %local_target_data = ();

    # If this isn't the last child to signon don't reply up-stream yet.
    if ( $handle->{child_replys} != $handle->{children} ) {
        return;
    }

    # Send the data upstream.
    my $reply = $handle->{all_replys};

    reply_to_parent( $handle, $reply );
    if ( $handle->{shutdown} ) {
        inner_cleanup_and_exit($handle);
    }

    # Reset local data.
    $handle->{all_replys}      = undef;
    $handle->{child_replys}    = 0;
    $handle->{target_response} = undef;
    return;
}

sub proc_link {
    my $pid = shift;
    my $path;
    if ($running_on_solaris) {
        $path = "/proc/$pid/path/a.out";
    } else {
        $path = "/proc/$pid/exe";
    }
    return readlink $path;
}

# Convert from a pid to a command name and do it in a safe manner to avoid
# warnings.  suid programs tend to have the exe link which is un-readable
# so if that yields nothing then load the name from the status file.
sub pid_to_name {
    my $pid = shift;
    my $exe = proc_link($pid);
    if ( defined $exe ) {
        return basename($exe);
    } else {
        return find_from_status( $pid, 'Name' );
    }
    return;
}

# Take the resource manager list of pids and possibly convert these into
# more interesting pids, in particular look for pids which appear to be
# scripts and, if they have any children, look at the children instead.
sub convert_pids_to_child_pids {
    my %process_data = get_extended_process_list( getpwuid $< );

    my %scripts;
    map { $scripts{$_}++ } split $COMMA, $inner_conf{scripts};

    my $ipids = $inner_conf{rmpids};

    foreach my $pid ( keys %process_data ) {

        # The resource manager pid this pid is associated with.
        my $rmpid;

        if ( defined $ipids->{$pid} ) {
            $rmpid = $pid;
        } else {
            my $ppid = $process_data{$pid};

            while ( defined $ppid and $ppid != 1 and $ppid != 0 ) {
                if ( defined $ipids->{$ppid} ) {
                    $rmpid = $ppid;
                    $ppid  = undef;
                } else {
                    $ppid = $process_data{$ppid};
                }
            }
        }

        next unless defined $rmpid;

        # Handle with the process going away whilst we look here, if we
        # don't have a name then it's gone and we should continue without
        # it.
        my $name = pid_to_name($pid);
        next unless defined $name;

        if ( defined $scripts{$name} ) {
            push @{ $ipids->{$rmpid}{scripts} }, $pid;
        } else {
            push @{ $ipids->{$rmpid}{notscripts} }, $pid;
        }
    }

    # Now chose what pid to target.
    my @apids;
    foreach my $key ( keys %{$ipids} ) {
        my $ip = $ipids->{$key};

        my $newpid;

        if ( defined $ip->{scripts} ) {
            my @ppids = sort { $a <=> $b } @{ $ip->{scripts} };
            $newpid = $ppids[0];
        }

        # If there are any pids which aren't scripts then target the first
        # one.
        if ( defined $ip->{notscripts} ) {
            my @ppids = sort { $a <=> $b } @{ $ip->{notscripts} };
            $newpid = $ppids[0];
        }

        # The process might have died and we simply didn't find anything,
        # if this is the case then just skip it, the outer will notice the
        # missing signon and report an appropriate error.
        next unless defined $newpid;

        my $status = hash_from_status($newpid);
        next unless defined $status;
        push @apids,
          {
            pid    => $newpid,
            vp     => $ip->{rank},
            status => $status,
          };

    }

    # Sort local pids by order of increasing rank.
    @{ $inner_conf{all_pids} } = sort { $a->{vp} <=> $b->{vp} } @apids;
    return;
}

# Find and report pids as part of the signon protocol, we should also
# report name
sub inner_find_pids {
    my ( $netdata, $cmd ) = @_;

    if ( defined $cmd->{pd} ) {
        my $hostname = $inner_conf{hostname};

  # Check for this host having any processes, if it doesn't appear to on first
  # inspection then try removing any domain name from the hostname and see if
  # we have a match then.  This allows for the case where hostnames are reported
  # the resource manager without domain names but hostnames are set with domain
  # names.
  # Note this is probably not 'safe' for strict definitions of hostnames and
  # domain names but for clusters it's highly unlikely to have two nodes with
  # matching hostnames but different domains.

        if ( not defined $cmd->{pd}{$hostname} ) {

            my ($new_name) = split $PERIOD, $hostname;

            if ( not defined $new_name ) {
                return;
            }
            $hostname = $new_name;
        }

        # We didn't match on the hostname and we haven't matched by removing
        # a domainname from our hostname so now try walking to list of specified
        # hosts to see if any of them shorten to the current hostname.
        if ( not defined $cmd->{pd}{$hostname} ) {
            foreach my $target_host ( keys %{ $cmd->{pd} } ) {
                my ($short_target) = split $PERIOD, $target_host;
                if ( $short_target eq $hostname ) {
                    $hostname = $target_host;
                }
            }

        }

        foreach my $rank ( keys %{ $cmd->{pd}{$hostname} } ) {
            register_target_process( $rank, $cmd->{pd}{$hostname}{$rank} );
        }
    } else {

        # Query the resource manager to find the pids, they'll be added to
        # the "all_pids" array.
        $rmgr{ $inner_conf{rmgr} }{find_pids}( $inner_conf{jobid} );
    }

    convert_pids_to_child_pids();

    foreach my $proc ( @{ $inner_conf{all_pids} } ) {
        my $pid = $proc->{pid};
        my $vp  = $proc->{vp};
        target_key_pair( $vp, 'FOUND', 'yes' );
        target_key_pair( $vp, 'NAME',  find_from_status( $pid, 'Name' ) );
        target_key_pair( $vp, 'STATE', find_from_status( $pid, 'State' ) );
    }
    return;
}

# Receive a command (perl reference) from our parent.
#
# When we receive a command:
# 1) Send it on to our children.
# 2) Execute it.
# 3) If we have no children send reply.
sub command_from_parent {
    my ( $netdata, $cmd ) = @_;

    $netdata->{host_responce} = 'ok';

    if ( $cmd->{mode} eq 'signon' ) {
        $netdata->{signon_cmd} = my_encode($cmd);

        # Setup the environment.
        foreach my $key ( keys %{ $cmd->{cinner} } ) {
            $inner_conf{$key} = $cmd->{cinner}{$key};
        }

        if (
            not
            exists $cmd->{connection_tree}{ $inner_conf{hostname} }{children} )
        {
            $netdata->{children} = 0;
            inner_find_pids( $netdata, $cmd );
            return;
        }

        my @children =
          @{ $cmd->{connection_tree}{ $inner_conf{hostname} }{children} };
        $netdata->{children} = @children;

        # Only one child is tested so far.
        foreach my $chostname (@children) {
            my $socket = connect_to_child(
                $chostname,
                $cmd->{remote}{$chostname}{port},
                $cmd->{remote}{$chostname}{key}
            );
            my %cdata;
            $cdata{socket}   = $socket;
            $cdata{hostname} = $chostname;
            $cdata{line_cb}  = \&reply_from_child;
            $cdata{state}    = 'init';
            $netdata->{sel}->add($socket);
            $netdata->{connections}{$socket} = \%cdata;
            push @{ $netdata->{child_sockets} }, $socket;
        }
        inner_find_pids( $netdata, $cmd );
        return;
    }

    # Forward on to our children before doing any more processing.
    if ( $netdata->{children} ) {
        my $req = my_encode($cmd) . "\n";
        foreach my $child ( @{ $netdata->{child_sockets} } ) {
            $child->printf($req);
            $child->flush();
        }
    }

    if ( $cmd->{mode} eq 'exit' ) {
        global_detach( $inner_conf{all_pids} );
        $netdata->{shutdown} = 1;
        return;
    }

    $inner_conf{mode} = $cmd->{mode};

    my $pid_list;

    # If supplied with a rank list then use it now to generate a list of
    # processes to inspect.
    if ( exists $cmd->{ranks} ) {
        my $rng = rng_dup( $cmd->{ranks} );

        # Loop over ranks first as there are potentially more of them.
        while ( defined( my $rank = rng_shift($rng) ) ) {
            foreach my $proc ( @{ $inner_conf{all_pids} } ) {
                my $vp = $proc->{vp};
                if ( $vp == $rank ) {
                    push @{$pid_list}, $proc;
                }
            }
        }
    } else {
        $pid_list = $inner_conf{all_pids};
    }

    # Record that this processes is still here so the outer process can
    # exit if there is nothing to target.
    foreach my $proc ( @{$pid_list} ) {
        if ( -d "/proc/$proc->{pid}" ) {
            target_key_pair( $proc->{vp}, 'FOUND', 'yes' );
        }
    }

    # Now do the work by calling handler or handler_all.  Catch any
    # exception errors here and extract the top line of the error to report
    # to the user.  If calling handler then just report an error for that
    # rank and move on, if using handler_all then report errors for all
    # ranks on this node.

    # This has the advantage that even if there is an error with data
    # collection on this node the rest of the application can carry and
    # hopefully still give the user meaningful information or at least
    # meaningful error messages.

    # Even if a exception is generated rank output may still exist for that
    # or any other rank on this node, we'll have to see if that causes
    # problems or if it's best to clear the target_key_pair() and output()
    # data for this node/rank.

    # Bit of a hack here until I can fix it properly, pass on the
    # output format so that the stack trace code knows when to do
    # clever things in tree mode.
    my $cargs = $cmd->{cargs};
    if ( defined $cmd->{out_format} ) {
        $cargs->{out_format} = $cmd->{out_format};
    } else {
        $cargs->{out_format} = 'raw';
    }

    # Ensure that we are attached to the target processes if required
    # and that we are not if not required.
    global_attach( $cmd->{mode}, $pid_list );

    if ( defined $allfns{ $cmd->{mode} }{handler_all} ) {
        eval {

            $netdata->{target_response} =
              $allfns{ $cmd->{mode} }{handler_all}( $cargs, $pid_list );
            1;
        } or do {
            my $error = $@;
            my @e = split qr{\n}x, $error;
            $netdata->{host_responce} = 'error';
            foreach my $proc ( @{$pid_list} ) {
                target_error( $proc->{vp}, "Critical error: ($e[0])" );
            }
          }
    } else {

        my %gres;
        foreach my $proc ( @{$pid_list} ) {
            my $vp  = $proc->{vp};
            my $pid = $proc->{pid};
            eval {

                # The only difference here is the type of the first option,
                # all functions should be converted to a single format here
                if ( defined $allfns{ $cmd->{mode} }{handler_one} ) {
                    my $res =
                      $allfns{ $cmd->{mode} }{handler_one}( $cargs, $proc );
                    $gres{$vp} = $res if ( defined $res );
                } else {
                    my $res =
                      $allfns{ $cmd->{mode} }{handler}( $cmd->{cargs}, $vp,
                        $pid );
                    $gres{$vp} = $res if ( defined $res );
                }
                1;
            } or do {
                my $error = $@;
                my @e = split qr{\n}x, $error;
                $netdata->{host_responce} = 'error';
                target_error( $vp, "Critical error: ($e[0])" );
              }
        }

        if (%gres) {
            $netdata->{target_response} = \%gres;
        }
    }

    # Detach from all processes if the outer requested us to.
    if ( defined $cmd->{detach_after_callback} ) {
        global_detach($pid_list);
    }

    return;
}

# Time for the inner process to exit, cleanup all sockets and quit.
sub inner_cleanup_and_exit {
    my $netdata = shift;
    foreach my $h ( $netdata->{sel}->handles() ) {
        $h->flush();
        $h->close();
    }
    exit 0;
}

# Send a reply to our parent, put a status of "ok" on for this host.
sub reply_to_parent {
    my ( $netdata, $cmd ) = @_;

    $cmd->{host_responce}{ $netdata->{host_responce} }
      { $inner_conf{hostname} } = 1;

    my $reply = my_encode($cmd);
    $netdata->{parent}->{socket}->print("$reply\n");
    return;
}

# Process a single line of input onto a socket we are listening on.  This
# is probably our parent (who may be the outer process) but it needs to be
# authenticated.
sub command_from_outer {
    my ( $netdata, $cdata, $line ) = @_;

    my $s = $cdata->{socket};
    if ( not $cdata->{trusted} ) {
        if ( $line eq "hello $netdata->{key}" ) {

            $cdata->{trusted} = 1;
            $cdata->{str}     = $EMPTY_STRING;
            $s->printf("Welcome\n");
            $netdata->{parent} = $cdata;
        } elsif ( $line eq 'debug' ) {
            my $r = Dumper($netdata);
            $s->printf($r);
            $s->flush();
            $netdata->{sel}->remove($s);
            $s->close();
            $cdata->{dead} = 1;
            print "debug\n";
        } else {
            print "Closing connection from $cdata->{desc} (Bad signon)\n";
            $netdata->{sel}->remove($s);
            $s->close();
            $cdata->{dead} = 1;
        }
        return;
    }

    command_from_parent( $netdata, my_decode($line) );

    if ( $netdata->{children} == 0 ) {
        my $res;
        if ( defined $netdata->{target_response} ) {
            $res->{target_response} = $netdata->{target_response};
        }

        # Save any output we've got from this node.
        foreach my $key ( keys %inner_output ) {
            $res->{target_output}{$key} = $inner_output{$key};
        }

        # Save any output we've got from this node.
        foreach my $key ( keys %inner_ns_output ) {
            $res->{target_ns_output}{$key} = $inner_ns_output{$key};
        }

        if (%local_target_data) {
            $res->{target_data} = \%local_target_data;
        }

        reply_to_parent( $netdata, $res );

        # Clear down the local inputs.
        %inner_output               = ();
        %inner_ns_output            = ();
        %local_target_data          = ();
        $netdata->{target_response} = undef;

        if ( $netdata->{shutdown} ) {
            inner_cleanup_and_exit($netdata);
        }
    }
    return;
}

# Loop forever in the inner process.
sub inner_loop_for_comms {
    my ($outerloc) = @_;

    my $server = create_local_port( $inner_conf{port_range} );

    my $lport    = $server->sockport();
    my $hostname = $inner_conf{hostname};
    my $key      = rand;

    my $outer_timeout = ( $inner_conf{interval} * 2 ) + 10;

    if ( defined $outerloc ) {
        my ( $ohost, $oport ) = split $COLON, $outerloc;
        my $os = IO::Socket::INET->new(
            PeerAddr => $ohost,
            PeerPort => $oport,
            Proto    => 'tcp',
        ) or confess('Failed to connect to outer');
        my $sec = find_padb_secret();
        croak 'No secret' if not defined $sec;
        $os->print("Hello $sec $hostname $lport $key\n");
        $os->close();
    } else {

        # For now just print the signon code to stdout and let the outer pick
        # it up.
        my $signon_text = "connect $hostname $lport $key\n";
        print $signon_text;

        # Add an explicit flush here to ensure the signon is printed,
        # stdout doesn't automatically get forwarded to through the
        # resource manager without this here.
        flush { *STDOUT };
    }

    my $netdata;
    $netdata->{sel} = IO::Select->new();
    $netdata->{sel}->add($server);
    $netdata->{server}   = $server;
    $netdata->{key}      = $key;
    $netdata->{shutdown} = 0;

    my $sel = $netdata->{sel};

    my $stime = time;

    # "Last seen time" of another process.  This is the time we last had any
    # communication from the outer, if it becomes too far in the past then
    # we should probably exit.
    my $ltime = $stime;

    while ( $sel->count() > 0 ) {
        while ( my @data = $sel->can_read(5) ) {
            $ltime = time;
            foreach my $s (@data) {
                if ( $s == $server ) {
                    my $new = $server->accept() or confess('Failed accept');
                    $sel->add($new);
                    my $peer = getpeername $new;
                    my ( $port, $addr ) = unpack_sockaddr_in($peer);
                    my $ip = inet_ntoa($addr);
                    my $remhost = gethostbyaddr $addr, AF_INET;

                    my %sinfo;
                    $sinfo{hostname}              = $remhost;
                    $sinfo{trusted}               = 0;
                    $sinfo{port}                  = $port;
                    $sinfo{desc}                  = "$remhost:$port";
                    $sinfo{socket}                = $new;
                    $sinfo{line_cb}               = \&command_from_outer;
                    $netdata->{connections}{$new} = \%sinfo;
                    next;
                }

                my $sinfo = $netdata->{connections}{$s};
                my $d;
                my $count = sysread $s, $d, 65536;

                # Dead connection.
                if ( not defined $d or $count == 0 ) {

                    if ( eof $s ) {
                        $sel->remove($s);
                        $s->close();
                        $sinfo->{trusted} = 0;
                        $sinfo->{dead}    = 1;
                        my $scount = $sel->count();
                    }
                    next;
                }

                $sinfo->{str} .= $d;
                extract_line( $netdata, $sinfo );

            }
        }
        my $time = time;

        # Should probably handle this better, if the outer or tree never
        # signons for whatever reason silently die as it's probably the best
        # thing do to.
        if ( ( $sel->count() == 1 ) and ( ( $time - $stime ) > 30 ) ) {
            exit 0;
        }

        # If we are (were) connected but haven't heard anything for a while then
        # the outer process has likely died so we should also exit cleanly.
        # There doesn't seem to be another way to detect this so just abort
        # if we haven't heard anything for a while.  This value needs to be
        # greater than the maximum reasonable value for 'interval' in the
        # outer process.
        if ( ( $time - $ltime ) > $outer_timeout ) {
            exit 0;
        }
    }
    my $count = $sel->count();
    print "Thats not supposed to happen count=($count)\n";
    return;
}

sub inner_main {

    $inner_conf{hostname} = hostname();

    # Load the inner config options, the defaults are the same as the outer
    # config options so just load them as they are set normally.  Use the
    # @inner_conf and @inner_conf_cmd lists to decide which ones to copy.
    # If any of these options are set then the outer process will forward
    # on any changes as part of the setup procedure.
    foreach my $conf (@inner_conf) {
        $inner_conf{$conf} = $conf{$conf};
    }

    foreach my $conf (@inner_conf_cmd) {
        $inner_conf{$conf} = $conf{$conf};
    }

    # Over-ride the defaults for these two as minfo might not exist on the
    # front end.
    $inner_conf{edb} = find_edb();

    # Load the command line options.
    my %optionhash;

    map { $optionhash{"$_=s"} = \$inner_conf{$_} } @inner_conf_cmd;

    GetOptions(%optionhash) or confess("could not parse options\n");

    $inner_conf{myld} = $ENV{LD_LIBRARY_PATH};

    inner_loop_for_comms( $inner_conf{outer} );
    exit 0;

}

###############################################################################
#
# Main.
#
###############################################################################

# Initialise (some of) the options which are common to both the inner and
# outer instances of padb.  Attempt to make it easy to add new options by
# keeping everything in one place.
#
# Additional work is needed to make this 100% consistent, some of these
# options have secondary options (e.g. --kill and --signal) and this isn't
# dealt with yet.
#
# stack_long has a special case later on which adds two extra handlers in
# the inner code, this could be replaced by prehandler and posthandler but
# it's the only code that needs it so far.

sub to_arg {
    my $arg = shift;
    my $res = "$arg->{arg_long}";
    $res =~ s{_}{-}gx;
    if ( defined $arg->{arg_short} ) {
        $res .= "|$arg->{arg_short}";
    }
    if ( defined $arg->{type} ) {
        $res .= $arg->{type};
    }
    return $res;
}

sub common_main {

    # The quasi-authoritative list of modes padb can operate in.

    # Notes on the callback functions and parameters.

    # handler     Called in the inner for each target process.
    # param:      ??, $vp, $pid

    # handler_all Called once in the the inner and should iterate over each
    # target process.
    #
    #             ??, $vp, $pid

    # These two functions can either return a value, and have it passed to
    # the output handler or call output() and use the
    # default_output_handler().

    # out_handler Called once in the outer to display the output
    # pre_out_handler Called once in the outer to display any header.

    # TODO:
    # --mode=<mode> on the command line?
    # Sort out secondary and options_i so they are handled in the same way.

    $allfns{queue} = {
        out_handler => \&mpi_queue_output_handler,
        arg_long    => 'message-queue',
        qsnet       => 1,
        arg_short   => 'q',
        handler     => \&qsnet_show_tport_queue,
        help        => 'Show the message queues',
        options_i   => {
            minfo   => undef,
            mpi_dll => undef,
        }
    };

    $allfns{kill} = {
        handler   => \&kill_proc,
        arg_long  => 'kill',
        help      => 'Deliver signal to processes',
        secondary => [
            {
                arg_long => 'signal',
                type     => '=s',
                default  => 'TERM',
                verify   => \&check_signal,
            }
        ]
    };

    $allfns{mqueue} = {
        handler_one => \&show_mpi_queue_one,
        needs_gdb   => 1,
        arg_long    => 'mpi-queue',
        arg_short   => 'Q',
        help        => 'Show MPI message queues',
        options_i   => {
            minfo   => undef,
            mpi_dll => undef,
        }
    };

    $allfns{deadlock} = {
        handler_one => \&show_mpi_queue_one,
        needs_gdb   => 1,
        arg_long    => 'deadlock',
        arg_short   => 'j',
        help        => 'Run deadlock detection algorithm',
        out_handler => \&mpi_deadlock_detect,
        options_i   => {
            mpi_dll => undef,
            minfo   => undef,
        },
        options_bool => {
            show_group_members => 'no',
            show_all_groups    => 'no',
        },
    };

    $allfns{pinfo} = {
        handler_all  => \&show_proc_all,
        arg_long     => 'proc-info',
        help         => 'Show process information',
        options_bool => {
            proc_shows_proc => 'yes',
            proc_shows_fds  => 'no',
            proc_shows_maps => 'no',
            proc_shows_stat => 'no',
        }
    };

    $allfns{threads} = {
        handler_one => \&thread_list_from_pid,
        needs_gdb   => 1,
        arg_long    => 'list-threads',
        help        => 'List threads in target processes',
    };

    $allfns{proc_summary} = {
        handler_all => \&show_proc_all,
        out_handler => \&show_proc_format,
        arg_long    => 'proc-summary',
        help        => 'Show process information in top format',
        options_i   => {
            column_seperator => '  ',
            proc_sort_key    => 'rank',
            nprocs_output    => undef,
        },
        options_bool => {
            proc_shows_proc    => 'yes',
            proc_shows_stat    => 'yes',
            proc_shows_fds     => 'no',
            proc_shows_maps    => 'no',
            proc_show_header   => 'yes',
            reverse_sort_order => 'no',
        },
        secondary => [
            {
                arg_long => 'proc_format',
                type     => '=s',
                default =>
'rank,hostname,pid,vmsize,vmrss,stat.state=S,load1=uptime,pcpu=%cpu,stat.processor=lcore,name=command'
            },
        ]
    };

    $allfns{stack} = {
        handler_one => \&stack_trace_from_pid,
        needs_gdb   => 1,
        arg_long    => 'stack-trace',
        arg_short   => 'x',
        help        => 'Show stack trace (see also -t)',
        options_i   => {
            gdb_retry_count     => 3,
            max_distinct_values => 3,
            stack_strip_above =>
'elan_waitWord,elan_pollWord,elan_deviceCheck,opal_condition_wait,opal_progress',
            stack_strip_below => 'main,__libc_start_main,start_thread',
            thread_id         => undef,
        },
        options_bool => {
            stack_shows_params => 'no',
            stack_shows_locals => 'no',
        },
        secondary => [
            {
                arg_long => 'strip_below_main',
                type     => '!',
                default  => 1,
            },
            {
                arg_long => 'strip_above_wait',
                type     => '!',
                default  => 1,
            },
          ]

    };

    $allfns{stack_long} = {
        handler_all => \&show_full_stacks,
        arg_long    => 'stack-trace-full',
        arg_short   => 'X',
        help        => 'Show long stack trace (with locals)',
    };

    $allfns{mpi_watch} = {
        handler_all     => \&mpi_watch_all,
        arg_long        => 'mpi-watch',
        help            => 'Trace MPI programs',
        pre_out_handler => \&pre_mpi_watch,
        out_handler     => \&show_mpi_watch,
        options_i       => {
            minfo          => undef,
            mpi_dll        => undef,
            mpi_watch_file => undef,
        }
    };

    $allfns{lstopo} = {
        handler      => \&lstopo,
        arg_long     => 'lstopo',
        help         => 'Show CPU topology using lstopo',
        options_i    => { lstopo_command => 'lstopo --pid %p -.txt', },
        options_bool => { lstopo_show_warning => 'no', },
    };

    $allfns{command} = {
        handler   => \&run_cmd_against_target,
        arg_long  => 'command',
        help      => 'Run command on target node',
        options_i => { command => 'readlink /proc/%p/exe', }
    };

    $allfns{ping} = {
        handler  => \&ping_rank,
        arg_long => 'ping',
        help     => 'Internal ping',
    };

    $allfns{set_debug} = {
        handler   => \&set_debug,
        qsnet     => 1,
        arg_long  => 'set-debug',
        arg_short => 'D',
        help      => 'Set debug flags (use --dflag=value)',
        secondary => [
            {
                arg_long => 'dflag',
                type     => '=s',
                default  => '0'
            }
        ]
    };

    # These next two don't work currently pending access to a QsNet system
    # for testing.  In the new full-duplex world startup is a little
    # different and these functions need updating.  In particular the
    # following need to be addressed.  the callback parameters are probably
    # wrong.  The shared memory key needs to be calculated.  Config options
    # need to be read locally rather than globally
    $allfns{qsnet_stats} = {
        handler_all => \&inner_show_stats,
        out_handler => \&estats_show_stats,
        qsnet       => 1,
        arg_long    => 'statistics-total',
        arg_short   => 's',
        help        => 'Show the job-wide statistics.',
        options_i   => {
            stats_name     => undef,
            stats_sort_key => 'vp',

        },
        options_bool => {
            stats_reverse  => 'no',
            stats_short    => 'no',
            show_all_stats => 'no',
          }

    };
    $allfns{qsnet_groups} = {
        handler_all  => \&inner_show_stats,
        out_handler  => \&group_status,
        qsnet        => 1,
        arg_long     => 'group',
        arg_short    => 'g',
        help         => 'Show the state of collective operations (groups).',
        options_bool => {
            show_group_members => 'no',
            show_all_groups    => 'no',
        }
    };

    return;
}

# Now run some actual code.

common_main();

if ( @ARGV > 0 and $ARGV[0] eq '--inner' ) {
    shift @ARGV;
    inner_main();
} else {
    outer_main();
}

exit 0;
