#!/bin/sh # # /secure/check_disk # # Function: # If a filesystem is full or system heavily loaded alert # $admin ONCE. Set a flag and alert again when problem is fixed. # # Example cron usage: # 0 8-18 * * 1-5 /secure/check_disk 99 200 # # 2005.10.02 SB: Major Update. Fix & test for HP-UX, # Suse 9.1/9.3, Solaris 8/9, RHEL3.o # 2001.12.13 Sean Boran for Solaris. ######################################################### #DEBUG=1 ## read in settings from config file config="/secure/secure.conf" #group=`uname -n` group=default tool="check_disk" #unused=` awk -F: '{if ($1==g && $2==t) print $3}' g=$group t=$tool $config` admin=` awk -F: '{if ($1==g && $2==t) print $4}' g=$group t=$tool $config` ## PATH=/sbin:/usr/sbin:/bin:/usr/bin:/usr/sbin:/usr/local/bin ignore="nfs|shm|smb|hsfs|cdfs|subfs|devpts|usbdevfs|sysfs|/dev/fd|mnttab|/proc" # Ignore these Filesystems subject="Disk/performance problems" subject2="Disk/performance FIXED - OK" tempfile=/tmp/chek_disk.$$ flagfile=/tmp/chek_disk.flag ##---------- functions ------- os=`uname -s` hw=`uname -m` if [ "$os" = "SunOS" ] ; then df='df -lk' load=`uptime | awk -F',' '{print 100*$5}' ` # load over 5 mins top="top -b" tail="tail +2" mailit=/bin/mailx mount="mount -p"; echo=echo; elif [ "$os" = "HP-UX" ] ; then df='bdf' load=`uptime | awk -F',' '{print 100*$5}' ` # load over 5 mins top="" tail="tail +2" mailit=/bin/mailx mount="mount"; echo=echo; elif [ "$os" = "Linux" ] ; then #df='df -hlk' df='df -P' load=`uptime | awk -F',' '{print 100*$4}' ` # load over 5 mins top="top -b -n 1" tail="tail +2" # Gentoo: differentiate later: tail="tail -n 2" mailit=/bin/mail mount="mount"; echo="echo -e"; elif [ "$os" = "OpenBSD" ] ; then df='df -hlk' load=`uptime | awk -F',' '{print 100*$4}' ` # load over 5 mins top="" tail="tail +2" mailit=/bin/mailx mount="mount"; echo="echo -e"; fi USAGE="USAGE: $0 DISK_SPACE_% LOAD_LIMITx100"; arg1="$1"; arg2="$2"; fs_limit=80; load_limit=190; # "uptime" max. allowed value * 100 if [ "$arg2" != "" ] ; then load_limit="$arg2"; fs_limit="$arg1"; else echo $USAGE exit 1; fi ## Check filesystems for percentage full > $fs_limit #[ $DEBUG ] && ($mount | egrep -v "$ignore" | awk '{print $3}') #for filesys in `$df |$tail |egrep -v "$ignore" | awk '{print $6}'`; do for filesys in `$mount |egrep -v "$ignore" | awk '{print $3}'`; do [ $DEBUG ] && $echo "Checking $filesys, \c" #[ $DEBUG ] && $df $filesys|$tail fs_space=`$df $filesys|$tail |awk '{print $5}'| sed 's/\%//' ` [ $DEBUG ] && $echo "space=$fs_space" if [ $fs_space -gt $fs_limit ]; then $echo "Filesystem $filesys is $fs_space % full" > $tempfile [ $DEBUG ] && $echo "Filesystem $filesys is $fs_space % full" fi done ## check system load over last 5 minutes ## if [ $load -gt $load_limit ]; then $echo "System under heavy load (greater than $load_limit)..." >> $tempfile [ $DEBUG ] && $echo "System under heavy load (greater than $load_limit)..." else [ $DEBUG ] && $echo "Current load over 15 mins=$load" fi ## Were any problems found above? If yes, email results if [ -f $tempfile ] ; then [ $DEBUG ] && $echo "problem found..\c" ## have we reported a problem already? ## if not, report it and set flag if [ ! -f $flagfile ]; then [ $DEBUG ] && $echo " send alert email." # add in some more info to help the admin $echo " " >> $tempfile $df >> $tempfile $echo " " >> $tempfile $echo "Load:" >> $tempfile uptime >> $tempfile $echo " " >> $tempfile $top >> $tempfile $echo " " >> $tempfile ps -ef >> $tempfile $echo " " >> $tempfile $echo "\n\nThis email was created by: `uname -n ` $0" >> $tempfile $mailit -s "`uname -n` $subject" $admin < $tempfile rm $tempfile touch $flagfile else [ $DEBUG ] && $echo " but no action (alert previously sent)." fi else # no problem if [ -f $flagfile ]; then # there was a problem on the last run. [ $DEBUG ] && $echo "Previous problem fixed- send email." $echo "previous problem fixed" | \ $mailit -s "`uname -n` $subject2" $admin rm $flagfile # clear flag else [ $DEBUG ] && $echo "no problem." fi fi