#!/usr/bin/python3

# Copyright (c) 2019 SUSE LLC  All rights reserved.
#
# check_proc_timed_hang is free software: you can redistribute it
# and/or modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation, version 2 of
# the License.
#
# check_proc_timed_hang is distributed in the hope that it will
# be useful, but WITHOUT ANY WARRANTY; without even the implied warranty
# of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with check_proc_timed_hang. If not, see
# <http://www.gnu.org/licenses/>.
#
"""
usage: check_proc_timed_hang -h | --help
       check_proc_timed_hang -p <process> [-k <kill_signal>] [-t <run_time_allowance>]

options:
   -h --help
       Show help
   -p --process-name <process>
       The name of the process to be monitored
   -k --kill-signal <kill_signal>
       If provided the monitored process will be killed with the
       provided signal
   -t --run-time-allowance <run_time_allowance>
       The runtime allotted for the process. Specify a numeric value followed
       by s (for seconds), m (for minutes), h (for hours), or d (for days).
       A value of 10m would expect the process to run for 10m. If the process
       is still running after 10m an attempt will be made to kill it with
       the given signal if provided. If either the kill attempt fails or
       no kill instruction is provided the process will be marked as critical.
       The default allotted runtime is 1 hour. [default: 1h]
"""

import datetime
import dateutil.relativedelta
import os
import sys
import time
import psutil

from docopt import docopt

# Command line processing
command_args = docopt(__doc__)

kill_signal = command_args.get('--kill-signal')
process_name = command_args.get('--process-name')
allowed_runtime = command_args.get('--run-time-allowance')

# Nagios states
OK = 0
WARNING = 1
CRITICAL = 2
UNKNOWN = 3


def get_process_by_name(process_name):
    processes = []
    for process in psutil.process_iter():
        try:
            pinfo = process.as_dict(attrs=['pid', 'ppid', 'name', 'cmdline', 'create_time'])
        except psutil.NoSuchProcess:
            pass
        else:
            if process_name in " ".join(pinfo['cmdline']) and pinfo['pid'] != os.getpid():
                processes.append(pinfo)

    if len(processes) > 1:
        print(
            "Multiple processes found: " + ", ".join(
                map(
                    lambda p: '"%s" (pid=%s)' % (" ".join(p['cmdline']), p["pid"]),
                    processes
                )
            )
        )
        sys.exit(CRITICAL)

    return next(iter(processes), None)


process_info = get_process_by_name(process_name)
if not process_info:
    print('Process %s is not running' % process_name)
    sys.exit(OK)

create_time = datetime.datetime.fromtimestamp(process_info['create_time'])

delta = {'s': 'seconds', 'm': 'minutes', 'h': 'hours'}.get(allowed_runtime[-1], 'days')
expire_time = create_time + dateutil.relativedelta.relativedelta(**{delta: +int(allowed_runtime[:-1])})

if expire_time < datetime.datetime.now():
    # We have exceeded the specified runtime allowance
    if kill_signal:
        os.system('kill -%s %s' % (kill_signal, process_info['pid']))
        time.sleep(1)
        if psutil.pid_exists(process_info['pid']):
            msg = 'Process "%s" still running after kill attempt'
            print(msg % process_name)
            sys.exit(CRITICAL)
        else:
            msg = 'Process "%s" successfully removed'
            print(msg % process_name)
            sys.exit(OK)

    msg = 'Process "%s" exceeded expiration time "%s"'
    print(msg % (process_name, expire_time))
    sys.exit(WARNING)
else:
    msg = 'Process "%s" within runtime allotment'
    print(msg % process_name)
    sys.exit(OK)
