#!/bin/sh

# Name: pestat
# ------------
# Torque resource manager utility script: Print a 1-line summary of jobs on each node.
# Usage: Run "pestat -h" for help information.
#
# Colors may be used in the output (also controlled by the PESTAT_COLOR environment variable).
# The printout at the end may be customized if needed.
#
# Netload information:
# --------------------
# The Torque pbs_mom records network load information as the sum of transmit+receive
# of all interfaces.
# The "netload" information is defined in the source file ./src/resmom/linux/mom_mach.c
# as the sum of bytes on all network interfaces since boot time, read from /proc/net/dev.
# The pestat command (from version 2.9) prints delta-netload information when run twice
# with some time interval in between. The file $NETLOADFILE stores recorded information.
#
# The baseline netload information may be generated from cron, say, every 10 minutes
# by this crontab entry:
# */10 * * * * /usr/local/bin/pestat -C > /dev/null
# Otherwise the default netload file is the user-specific file netload.$USER.
# Subsequent pestat commands will use the baseline netload.

# Author: zhangjinyang@biols.ac.cn
# Version: 2.15
# Update Notes:
#  - Support to specified queue, if no nodelist given, show all nodes
#  - Support to get job list from \"jobs\"(jobs_from_status=0), NOT "status->jobs"
#  - Reapire numtask[] error for Torque v4 (tasklist is not splited with space)
#  - Reapire disorder for columns with different width, formating them
# Date: 2020-08-17 20:27:04

# Author: Ole.H.Nielsen@fysik.dtu.dk
# URL: ftp://ftp.fysik.dtu.dk/pub/Torque/pestat

VERSION="pestat version 2.15.  Date: 17 Aug 2020"

# Locations of command and directories
# Replace with path to pbsnodes and qstat commands if they are not included in $PATH
PBSNODES=pbsnodes
QSTAT=qstat
AWK=/bin/awk

# Node names have different lengths at different sites,
# so configure this printf string to accomodate your longest node name + 1 (>= 5 chars)
NODENAMEFORMAT="%s%-10s"

# The pestat status directory (must be secure location for root)
PESTAT_LIBDIR=/var/lib/pestat
NETLOAD_CRON=$PESTAT_LIBDIR/netload.cron
if test ! -d $PESTAT_LIBDIR -a "$USER" = "root"
then
    mkdir -v $PESTAT_LIBDIR
fi

# Minimum age of NETLOADFILE (seconds): If less than this value the netload information may not be reliable:
NETLOADFILE_MINAGE=1

# Command usage:
function usage()
{
cat <<EOF
Usage: $0 [-f] [-a] [-c|-n] [-d] [-v] [-q queue] [-s state] [-u username|-g groupname] [-j jobs] [-C] [-h]
where:
    -a: Listing all jobs on node (default: only show user jobs)
    -q: Listing only nodes in specific queue (default: show all nodes)
    -s: Listing only nodes in specific state (default: show all nodes)
    -f: Listing only nodes that are flagged by \*
    -d: Listing also nodes that are down
    -c/-n: Color/no color output
    -u username: Print only user <username> (do not use with the -g flag)
    -g groupname: Print only users in group <groupname>
    -j jobs: List only nodes with at least <jobs> running jobs 
    -C: Use with cron: Netload file will be saved as $NETLOAD_CRON
    -h: Print this help information
    -v: Version information
EOF
}

NODEBLACKLIST="node78:node161:node162:node163:node164:node165:node166:node167:node168:node169:node170"
QUEUEBLACKLIST="high"

#
# Netload information from Torque pbs_mom will be printed
#
netloadprint=1
# Temporary file for Netload information:
if test -s $NETLOAD_CRON
then
    # If NETLOADFILE has been generated by a cron-job as $NETLOAD_CRON we use this file
    NETLOADFILE=$NETLOAD_CRON
    NETLOADWRITE=0
else
    # Default: Per-user netload file
    NETLOADFILE=/var/tmp/netload.$USER
    NETLOADWRITE=1
fi

# Scaling of the network load:
# Default value:
# NETLOADSCALE=1
# If you use Linux port bonding, each network byte is counted twice by Torque: bond0 plus ethX devices
# therefore you need to scale down the netload by a factor of 2:
NETLOADSCALE=2

# Netload threshold above which we flag this node:
# Netload > 2000 Mbit/s is flagged (we have dual-Gigabit Ethernet)
# Gigabit Ethernet full-duplex is 2*1000=2000 Mbit/s
NETLOADTHRES=2000

#
# Default parameter values
#
# Omit down nodes from the flagged list because we do not wish to see them
# (Use "pbsnodes -l" to list down nodes).
listdownnodes=0

# List only nodes with >= minjobs running jobs (default: minjobs=0)
# This is useful for selecting those nodes that run multiple jobs.
minjobs=0

# Colored output by default
colors=1

# Check user environment variable PESTAT_COLOR for color
if test "$PESTAT_COLOR" = "0"
then
    colors=0
fi

# Check if output is NOT a terminal: Turn colors off (can be overruled by "-c" flag).
FD=1    # File Descriptor no. 1 = stdout
if test ! -t $FD
then
    colors=0
fi

#
# Process command arguments
#
listflagged=0
listalljobs=0
while getopts "aq:s:fdcnvu:g:j:Ch" options; do
    case $options in
        a )     listalljobs=1
            # List all jobs on nodes
            ;;
        q )     queue=$OPTARG
            echo Listing only nodes in queue $queue
            # List only nodes in queue
            ;;
        s )     tstate=$OPTARG
            echo Listing only nodes in state $state
            # List only nodes in state
            ;;
        f )     listflagged=1
            echo Listing only nodes that are flagged by \*
            ;;
        d )     listdownnodes=1
            # Listing also down nodes
            ;;
        c )     colors=1
            # Force the use of colors in output
            ;;
        n )     colors=0
            # Do not use colors in output
            ;;
        u )     username=$OPTARG
            echo Select only user $username
            ;;
        g )     groupname=$OPTARG
            echo Select only users in group $groupname
            ;;
        j )     minjobs=$OPTARG
            echo List only nodes with at least $minjobs running jobs
            ;;
        C ) NETLOADFILE=$NETLOAD_CRON
            NETLOADWRITE=1
            ;;
        v ) echo $VERSION
            exit 0;;
        h|? ) usage
            exit 1;;
        * ) usage
            exit 1;;
    esac
done

# Test for extraneous command line arguments
if test $# -gt $(($OPTIND-1))
then
    echo ERROR: Too many command line arguments: $*
    usage
    exit 1
fi

if test -n "$username" -a -n "$groupname"
then
    echo ERROR: Do not select both username and groupname
    usage
    exit 1
fi

# if test -s $PBSNODES
# then
#     :
# else
#     echo ERROR: Can not find pbsnodes
#     usage
#     exit 1
# fi

if test -s $NETLOADFILE
then
    # NETLOADFILE file age in seconds:
    filetime=`stat -c "%Y" $NETLOADFILE`
    now=`date "+%s"`
    netloadage=$(($now-$filetime))
    # Print age (strip extraneous output from "stat")
    # echo Netload file $NETLOADFILE age: $netloadage seconds, dated `stat -c "%y" $NETLOADFILE | awk -F. '{print $1}'`
    if test $netloadage -lt $NETLOADFILE_MINAGE
    then
        # echo Netload file age should be at least $NETLOADFILE_MINAGE seconds, please try again...
        echo Interval between operations should be at least $NETLOADFILE_MINAGE second, please try again...
        exit 0
    fi
else
    # echo Netload file $NETLOADFILE does not exist: Cannot print netload until next time.
    # echo You have to run this command again after some time in order to calculate current network load.
    netloadprint=0
fi

#
# Heading for printout showing:
#
# node:     Node hostname
# state:    Torque state
# load:     CPU load average
# pmem:     Physical memory
# ncpu:     Number of CPUs
# mem:      Physical+virtual memory
# resi:     Resident (used) memory
# usrs:     Number of sessions / Number of users
# Netload:  Number of network I/O bytes
# jobs:     Number of jobs
# jobids/users: Jobids and corresponding usernames of Torque jobs on this node

#
# Show the Torque node status and parse the results
#

$PBSNODES -a | $AWK -v listflagged=$listflagged -v listdownnodes=$listdownnodes \
    -v listalljobs=$listalljobs -v NODEBLACKLIST=$NODEBLACKLIST -v QUEUEBLACKLIST=$QUEUEBLACKLIST \
    -v colors=$colors -v username=$username -v groupname=$groupname -v minjobs=$minjobs \
    -v NODENAMEFORMAT=$NODENAMEFORMAT \
    -v QSTAT=$QSTAT -v queue=$queue -v tstate=$tstate \
    -v netloadprint=$netloadprint -v NETLOADFILE=$NETLOADFILE -v NETLOADWRITE=$NETLOADWRITE \
    -v netloadage=$netloadage -v NETLOADTHRES=$NETLOADTHRES -v NETLOADSCALE=$NETLOADSCALE '
BEGIN {
    # Define terminal colors for the output if requested
    if (colors != 0) {
        # See http://en.wikipedia.org/wiki/ANSI_escape_code#Colors
        NONE=""
        NORMAL="\033[0m"
        BOLD="\033[1m"
        UNDERLINE="\033[4m"
        BLINK="\033[5m"
        REVERSE="\033[7m"
        CONCEALED="\033[8m"

        BLACK="\033[30m"
        RED="\033[31m"
        GREEN="\033[32m"
        YELLOW="\033[33m"
        BLUE="\033[34m"
        MAGENTA="\033[35m"
        CYAN="\033[36m"
        WHITE="\033[37m"

        ON_BLACK="\033[40m"
        ON_RED="\033[41m"
        ON_GREEN="\033[42m"
        ON_YELLOW="\033[43m"
        ON_BLUE="\033[44m"
        ON_MAGENTA="\033[45m"
        ON_CYAN="\033[46m"
        ON_WHITE="\033[47m"

        BEEP="\007"
    }
    # Conversion factor of netload (bytes) to Mbit/sec
    MBITSEC = 1000000*NETLOADSCALE/8

    # Get the list of jobids versus usernames from qstat
    QSTAT = QSTAT " -r"         # Append -r flag (running jobs) to qstat.
    while ((QSTAT | getline) > 0) {     # Parse lines from qstat -r
        if (++line>5) {         # Skip first 5 header lines
            split($1,b,".")     # Jobid is b[1]
            jobuser[b[1]] = $2  # Username of this jobid
        }
    }
    close(QSTAT)

    if (netloadprint == 1) {
        # Read previously recorded netload file (format: nodename network-bytes list-of-jobids)
        while ((getline < NETLOADFILE) > 0) {
            oldnetload[$1] = $2
            for (i=3; i<=NF; i++) oldjobidlist[$1] = oldjobidlist[$1] " " $i
        }
        if (NETLOADWRITE == 1) {
            # Truncate NETLOADFILE
            close(NETLOADFILE)
            print "" > NETLOADFILE
        }
    }

    if (username != "") {
        userselect=1            # We select only this username
        userfound=0
        # Get the list of user full names from passwd lines
        while ("getent passwd" | getline ) {
            split($0,b,":")         # Split password line into fields
            if (username == b[1]) userfound=1
        }
        if (userfound != 1) {
            print RED "ERROR: No such username:" NORMAL, username
            exit 1
        }
    } else if (groupname != "") {
        groupselect=1           # We have to select users in this groupname
        groupfound=0
        # Get the list of group names
        while ("getent group" | getline ) {
            split($0,b,":")         # Split group line into fields
            group[b[3]] = b[1]      # Group name b[1] of this GID (b[3])
            if (groupname == b[1]) groupfound=1
        }
        if (groupfound != 1) {
            print RED "ERROR: No such groupname:" NORMAL, groupname
            exit 1
        }
        # Get the list of user full names from passwd lines
        while ("getent passwd" | getline ) {
            split($0,b,":")         # Split password line into fields
            gidname[b[1]] = group[b[4]]   # Group name of this GID (numeric group id)
        }
    }

    nodenum = split(NODEBLACKLIST, nodelist, ":")
    for(i = 1; i <= nodenum; ++i) {
      BLACKNODE[nodelist[i]] = nodelist[i]
    }

    queuenum = split(QUEUEBLACKLIST, queuelist, ":")
    for (i = 1; i <= queuenum; ++i) {
      BLACKQUEUE[queuelist[i]] = queuelist[i]
    }

    # Print a header line
    printf (NODENAMEFORMAT, YELLOW, "Node")
    printf (" %sstate    load%     resi      pmem      mem  ncpu  proc                         queue    usrs tasks NetMbit jobids/users %s\n", YELLOW, NORMAL)
}
#
# Parse the output of pbsnodes
#
NF==1 { node=$1             # 1st line is nodename
    nodename[node] = node       # Node name
    getline             # Get the next input line
    numjobs[node] = 0               # Torque jobs on the node
    numtasks[node] = 0              # Number of tasks started by Torque on the node
    listnode=0          # Set to > 0 if this node gets flagged
    userusesnode=0          # If this node is used by the selected user 
    groupusesnode=0         # If this node is used by a user in the selected group 
    allocated = 0
    njob = 0
    while (NF >= 3) {       # Read a number of non-blank lines
        if ($1 == "state") {
            if ($3 == "job-exclusive")          state[node] = "excl"
            else if ($3 == "job-exclusive,busy")        state[node] = "busy"
            else if ($3 == "busy")              state[node] = "busy"
            else if ($3 == "free")              state[node] = "free"
            else if ($3 == "offline")           state[node] = "offl"
            else if ($3 == "offline,job-exclusive")     state[node] = "offl"
            else if ($3 == "offline,job-exclusive,busy")    state[node] = "offl"
            else if ($3 == "offline,busy")          state[node] = "offl"
            else if ($3 == "down")              state[node] = "down"
            else if ($3 == "down,offline")          state[node] = "offl"
            else if ($3 == "down,job-exclusive")        state[node] = "down"
            else if ($3 == "down,offline,job-exclusive")    state[node] = "offl"
            else if ($3 == "down,offline,busy")     state[node] = "offl"
            else if ($3 == "down,offline,job-exclusive,busy")   state[node] = "offl"
            else if ($3 == "UNKN")              state[node] = "UNKN"
        }
        else if ($1 == "np")        np[node] = $3
        else if ($1 == "properties")    properties[node] = $3
        else if ($1 == "ntype")     ntype[node] = $3
        else if ($1 == "jobs") {
            split($3, tjobs, ",")
            tmp_ppn = 0
            for (i in tjobs) {
                split(tjobs[i], jobinfo, "/")
                split(jobinfo[1], pids, "-")
                if (2 in pids) {
                    ppn = pids[2] - pids[1] + 1
                } else {
                    ppn = 1
                }
                if (2 in jobinfo) {
                    split(jobinfo[2], d, ".")
                    jobid = d[1]
                    tjobid = jobid
                    jobres[tjobid] += ppn + tmp_ppn
                    tmp_ppn = 0
                } else {
                    tmp_ppn += ppn
                }
                allocated += ppn
            }

            for (tjobid in jobres) {
                user = jobuser[tjobid]
                # Case where the node pbs_mom has a (dead job) jobid unknown to pbs_server:
                if (length(user) == 0) {    # Flag non-existent username
                    user=""
                    usercolor=NORMAL
                    listnode++
                } else
                    usercolor=RED
                # Append jobid and username to the job list
                if (listalljobs == 1 || length(user) != 0) {
                    jobiduserlist[node] = jobiduserlist[node] " " usercolor jobres[tjobid] "/" tjobid " " user NORMAL ","
                }
                jobidlist[node] = jobidlist[node] " " jobid
                # If this node is used by the selected user
                if (userselect==1 && user == username) userusesnode=1
                # If this node is used by a user in the selected group
                if (groupselect==1 && gidname[user] == groupname) groupusesnode=1
                njob++
            }
            numtasks[node] = njob

            delete jobres
        }       
        else if ($1 == "status") {
            # Get the node status subfields
            split (substr($0,15), a, ",")   # Remove leading "status =", split subfields separated by ","
            for (field in a) {      # Process individual status subfields
                split(a[field],b,"=")   # Split var=value fields
                if (b[1]=="arch")       arch[node]=b[2]
                else if (b[1]=="opsys")     opsys[node]=b[2]
                else if (b[1]=="sessions")  sessions[node]=b[2]
                else if (b[1]=="nsessions") nsessions[node]=int(b[2])
                else if (b[1]=="nusers")    nusers[node]=b[2]
                else if (b[1]=="idletime")  idletime[node]=b[2]
                else if (b[1]=="totmem")    totmem[node]=b[2]
                else if (b[1]=="availmem")  availmem[node]=b[2]
                else if (b[1]=="physmem")   physmem[node]=b[2]
                else if (b[1]=="ncpus")     ncpus[node]=b[2]
                else if (b[1]=="loadave")   loadave[node]=b[2]
                else if (b[1]=="netload")   netload[node]=b[2]
                else if (b[1]=="size")      size[node]=b[2]
                else if (b[1]=="rectime")   rectime[node]=b[2]
                else if (b[1]=="message")   msg[node]=b[2]
            }
        }
        getline         # Get the next input line
    }

    if (NETLOADWRITE == 1) {
        # Save netload information to file (format: nodename network-bytes list-of-jobids)
        print nodename[node], netload[node], jobidlist[node] >> NETLOADFILE
    }
    if (netloadprint == 1 && oldnetload[node] > 0) {
        netloadflag=" "
        # Calculate delta-netload
        netload[node] = netload[node] - oldnetload[node]
        if (netload[node] < 0) {
            netload[node] = -1  # Negative values are bad (could be due to recent node reboot)
            netloadflag="*"
        }
        netmbit=netload[node]/(netloadage*MBITSEC)  # Convert netload to Mbit/sec
        if (netmbit > NETLOADTHRES) {
            netmbitcolor=RED    # Netload > NETLOADTHRES is flagged
            listnode++
        } else {
            netmbitcolor=NORMAL
        }
        if (jobidlist[node] != oldjobidlist[node]) {
            netloadflag="!"     # List of jobids has changed: flag this
            # print "Old: ", oldjobidlist[node], "Current: ", jobidlist[node]
        }
    } else {
        netload[node] = 0       # No data available
        netloadflag="*"
    }

    # Is this node used by the selected user? Otherwise skip printout.
    if (userselect==1 && userusesnode==0) next
    # Is this node used by a user in the selected group? Otherwise skip printout.
    if (groupselect==1 && groupusesnode==0) next
    # If this node runs less than minjobs jobs
    if (numjobs[node] < minjobs) next
    # If this node in target queue ?
    if (queue != "" && properties[node] != queue) next
    # If this node in target state ?
    if (tstate != "" && state[node] != tstate) next

    # If this node in the blacklist ?
    if (node in BLACKNODE) next
    if (properties[nodes] in BLACKQUEUE) next

    # Print out values that we are interested in.  Flag unexpected values with a "*".

    # Flag nodes with status down, offline or unknown
    if (state[node] == "free") {
        stateflag=" "
        statecolor=GREEN
    } else if (state[node] == "busy") {
        stateflag="*"
        statecolor=CYAN
    } else if (state[node] == "excl") {
        stateflag="*"
        statecolor=RED
    } else if (state[node] == "offl") {
        stateflag="*"
        statecolor=ON_RED
    } else if (state[node] == "down" || state[node] == "UNKN") {
        stateflag="*"
        statecolor=NORMAL
        listnode++
    }

    # Flag unexpected CPU load average
    loaddiff = loadave[node] - numtasks[node]
    if (loaddiff > 2 || loaddiff < -2) {
        loadflag="*"
        loadcolor=RED
        cpucolor=GREEN
        listnode++
    } else if (loaddiff > 0.5 || loaddiff < -0.5) {
        loadflag="*"
        loadcolor=MAGENTA
        cpucolor=GREEN
        listnode++
    } else {
        loadflag=" "
        loadcolor=NORMAL
        cpucolor=NORMAL
    }

    # Remove "kb" unit from memory sizes
    sub("kb", "", totmem[node])
    sub("kb", "", availmem[node])
    sub("kb", "", physmem[node])
    # Resident memory
    resi = (totmem[node]-availmem[node])/1024
    aval = physmem[node]/1024 - resi
    if (resi > 50 && resi > physmem[node]/1024 + 1024) {    # Very high memory usage
        resiflag="*"
        resicolor=RED
        pmemcolor=GREEN
        listnode++
    } else if (resi > 50 && resi > physmem[node]/1024 - 50) {   # High memory usage
        resiflag="*"
        resicolor=MAGENTA
        pmemcolor=GREEN
        listnode++
    } else {
        resiflag=" "
        resicolor=NORMAL
        pmemcolor=NORMAL
    }

    # Flag unexpected number of processes or users
    if (nsessions[node] > 2*ncpus[node] + 1) {  # More than 2 sessions per job
        sessflag="*"
        sesscolor=RED
        listnode++
    } else if (nusers[node] > ncpus[node]) {    # More users than nCPUs is bad
        sessflag="*"
        sesscolor=RED
        listnode++
    } else {
        sessflag=" "
        sesscolor=NORMAL
    }

    # Flag unexpected number of jobs
    if (numjobs[node] > numtasks[node]) {       # Should be at least 1 task per job
        jobflag="*"
        jobcolor=RED
        listnode++
    } else {
        jobflag=" "
        jobcolor=NORMAL
    }

    # Listing of down nodes?
    if (listdownnodes == 0 && state[node] == "down") listnode=0

    # Progressbar
    nprog = int(20 * allocated / np[node])
    fill = "...................."
    bar = "####################"
    
    # Print a 1-line list for this node
    if (!listflagged || listnode > 0) {
        printf (NODENAMEFORMAT, NORMAL, node)
        printf (" %s%s%1s%s ", statecolor, state[node], stateflag, NORMAL)
        printf (" %s%6.2f%1s%s", loadcolor, loadave[node], loadflag, NORMAL)
        printf (" %s%8.2f%1s%s", statecolor, resi/1024, resiflag, NORMAL)
        printf (" %s%8.2f%s", statecolor, physmem[node]/1024/1024, NORMAL)
        printf (" %s%8.2f%s", statecolor, aval/1024, NORMAL)
        printf (" %s%5d%s", statecolor, np[node], NORMAL)
        printf (" %s%5d%s", statecolor, allocated, NORMAL)
        printf (" %s[%s%s]%s ", statecolor, substr(bar, 1, nprog), substr(fill, 1, 20 - nprog), NORMAL)
        printf (" %s%-8s%s", NORMAL, properties[node], NORMAL)
        printf (" %s%2d/%-2d%1s%s", sesscolor, nsessions[node], nusers[node], sessflag, NORMAL)
        printf (" %s%3d%1s%s", jobcolor, numtasks[node], jobflag, NORMAL)
        if (netloadprint == 1) {
            printf (" %s%5d%1s%s", netmbitcolor, netmbit, netloadflag, NORMAL)
        } else
            printf ("    -  ")
        if (msg[node] != "") {
            printf (" %s\n", msg[node])
        } else {
            printf ("%s\n", substr(jobiduserlist[node], 1, length(jobiduserlist[node])-1))
        }
    }
}'

