Advertisement
Guest User

Untitled

a guest
Jul 26th, 2016
74
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Bash 22.61 KB | None | 0 0
  1. #!/bin/bash
  2. # 2015-2016 by ghost75 v1.61
  3. # Tested on Nas4free and Linux.
  4. maxusedperc="80"
  5. minfreegig="20" #only Gigabytes and digits allowed
  6. maxwarncountperday="1" #how many disk space warnings per day? 0 to disable space warning mail
  7. scrubexpire="1728000" #after how many seconds scrub will expire: 7d x 24h x 3600s = 604800
  8. scrubrunhour="22" #run scrub off peak time, 24h time format
  9. spacewarncountfile="/tmp/spacewarn_count.log" #to get track when to send mail
  10. dayfile="/tmp/spacewarn_day.log" #to store actual day
  11. poolmailfile="/tmp/pool.log" #log related to zfs pool
  12. smarterrorfile="/tmp/smarterrors.log" #log includes smart attribute and unhealthy state if disk has error
  13. smartcache="/tmp/smartcache.log" #stores output of smartctl
  14.  
  15. #smartctl device options
  16. sdc="sat"
  17.  
  18. #email address needs only to be set if you dont want to use the values from
  19. #Nas4Free WebUI (System|Advanced|Email and Disk|Management|Smart or Status|Email Report)
  20. #of if you have non NAS4Free system
  21. email_to="root" #required on non Nas4free
  22. #email_from=""
  23.  
  24. ############################################################
  25.  
  26. PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
  27. source $HOME/.profile
  28.  
  29. send_mail() {
  30. if [ "$platform" == "FreeBSD" ] && [ ! -z $config ]; then
  31.     printf "From:$email_from\nTo:$email_to\nSubject:$subject\n\n$body" | /usr/local/bin/msmtp --file=/var/etc/msmtp.conf -t
  32. elif [ "$platform" == "Unix" ] || [ "$platform" == "Linux" ] || [ "$platform" == "FreeBSD" ]; then
  33.     if [ ! -z "$email_to" ]; then
  34.         mail -s "$subject" "$email_to" <<< "$body"
  35.     else
  36.         echo "Cannot send mail because parameter email_to is missing, will try to send again next time" >&2
  37.     fi
  38. fi
  39. }
  40.  
  41. run_help() {
  42. echo -e "\n Monitor your ZFS pools\n"
  43. echo " Script will monitor disk space and send email if thresholds are reached,"
  44. echo " it will send mail only a specific amount of times per day, which can be set"
  45. echo " by parameter."
  46. echo " Also there will be an email for any zfs pools which are not in online state"
  47. echo " (i.e. degraded) or those with chksum errors. There will be an email once for"
  48. echo " every new error after the old error is fixed."
  49. echo " Scrub will be also taken place on a regular basis (see scrubexpire parameter)"
  50. echo " if there are no chksum errors, scrub errors from last run and if pool is ONLINE"
  51. echo " and not in resilver state."
  52. echo " S.M.A.R.T. attributes id5,10,196,197,198 will be monitored for any raw value > 0."
  53. echo " There will be email for every new disk error but no reoccuring mail if the"
  54. echo -e " error stays the same. If the script cannot send mail, it will try next time.\n"
  55. echo "   The following parameters are supported:"
  56. echo "    -e scrubexpire, how many seconds there will be no new scrub"
  57. echo "    -f email_from"
  58. echo "    -g minfreegig, for disk space monitoring"
  59. echo "    -p maxusedperc, for disk space monitoring"
  60. echo "    -r scrubrunhour, in 24h format"
  61. echo -e "    -t email_to \n"
  62. echo " It can be scheduled with cron to run like every 5min i.e.:"
  63. echo " */5 * * * * root /usr/local/sbin/zfshealth"
  64. echo -e " You may need also to set PATH variable in cron file.\n"
  65.  
  66. exit 0
  67. }
  68.  
  69. #get parameters if specified
  70. while getopts e:f:g:hp:r:t: option; do
  71.         case "${option}" in
  72.                 e) scrubexpire=${OPTARG};;
  73.                 f) email_from=${OPTARG};;
  74.                 g) minfreegig=${OPTARG};;
  75.                 h) run_help;;
  76.                 p) maxusedperc=${OPTARG};;
  77.                 r) scrubrunhour=${OPTARG};;
  78.                 t) email_to=${OPTARG};;
  79.         esac
  80. done
  81.  
  82. ##which platform are we?
  83. if [ "$(uname)" == "Linux" ]; then
  84.     platform="Linux"
  85.     myhostname=$(hostname)
  86. elif [ "$(uname)" == "FreeBSD" ]; then
  87.     platform="FreeBSD"
  88.     myhostname=$HOST
  89. elif [ "$(uname)" == "Unix" ]; then
  90.     platform="Unix"
  91.     myhostname=$HOST
  92. else
  93.     platform="unknown"
  94.     myhostname="unknown"
  95.     echo "OS is not supported, exit here"
  96.     exit 1
  97. fi
  98.  
  99. ###########################
  100.  
  101. # Checking days (for limiting space warning mails)
  102.  
  103. daynow=$(date +"%d")
  104.  
  105. #if file doesnt exist, put date there
  106. if [ ! -f "$dayfile" ]; then
  107.         echo $daynow > $dayfile
  108. fi
  109.  
  110. #get day from file to know when the last runtime was
  111. read -r dayfromfile <<< $(cat $dayfile)
  112. #write actual day into file
  113. echo $daynow > $dayfile
  114.  
  115. #######################
  116.  
  117. #this is used for scrub expire
  118. currentscrubdate=$(date +"%s")
  119.  
  120. #######################
  121.  
  122. # Find out if its nas4free embedded or full install in order to get email address
  123. if [ -f /etc/platform ] && [ "$platform" == "FreeBSD" ]; then
  124.     if [[ $(grep embedded /etc/platform) ]]; then
  125.         config="/cf/conf/config.xml"  # For embedded installs
  126.         # Add a test to remount /cf if there is problems
  127.         if [[ ! -f $config ]]; then
  128.         umount /cf && mount /cf
  129.         fi
  130.  
  131.     else
  132.         config="/conf/config.xml"  # For full installs
  133.     fi
  134.  
  135.     # Email parameters
  136.     if [ -z "$email_to" ] && [ -f $config ]; then
  137.         email_to=$(/usr/local/bin/xml sel -t -v "//smartd/email/to" $config)
  138.     fi
  139.     if [ -z "$email_from" ] && [ -f $config ]; then
  140.         email_from=$(/usr/local/bin/xml sel -t -v "//email/from" $config)
  141.     fi
  142. fi
  143.  
  144. ##############################################################################################################
  145. ## Check ZFS pools for health, scrub and disk space
  146.  
  147. # Get disk space and pool state, only 5 columns are needed
  148. zpool list -H -o name,free,cap,health | while read output
  149. do
  150.         ################################
  151.         # get basic stuff like pool name and health
  152.         pool=$(echo $output | awk '{ print $1 }')
  153.         health=$(echo $output | awk '{ print $4 }')
  154.         if [ "$health" == "ONLINE" ]; then
  155.         echo -e "ZFS pool $pool state is \e[32m${health}\e[0m"
  156.         else
  157.         echo -e "ZFS pool $pool state is \e[31m${health}\e[0m"
  158.         fi
  159.  
  160.         ##################################
  161.         # files for pool errors, health and scrub state
  162.         poolpath="${poolmailfile%/*}"
  163.         poolfilename=$(basename "$poolmailfile")
  164.         poolfilename="${poolfilename%.*}"
  165.         poolext="${poolmailfile##*.}"
  166.         poolmailfilehealth="${poolpath}/${poolfilename}-${pool}-health.${poolext}"
  167.         poolmailfilescrub="${poolpath}/${poolfilename}-${pool}-scrub.${poolext}"
  168.         poolfilescrubstate="${poolpath}/${poolfilename}-${pool}-scrubstate.${poolext}"
  169.         poolfilechksumstate="${poolpath}/${poolfilename}-${pool}-chksumstate.${poolext}"
  170.         # files for disk space warnings
  171.         spacewarnpath="${spacewarncountfile%/*}"
  172.         spacewarnfilename=$(basename "$spacewarncountfile")
  173.         spacewarnfilename="${spacewarnfilename%.*}"
  174.         spacewarnext="${spacewarncountfile##*.}"
  175.         spacewarncountfilenew="${spacewarnpath}/${spacewarnfilename}-${pool}.${spacewarnext}"
  176.  
  177.         # create if doesnt exist
  178.         if [ ! -f "$spacewarncountfilenew" ]; then
  179.             echo 0 > $spacewarncountfilenew
  180.         fi
  181.  
  182.         ###################################
  183.         # see if there are chksum errors
  184.         if [ ! -f "$poolfilechksumstate" ]; then
  185.             touch $poolfilechksumstate
  186.         fi
  187.         read -r poolchksumstateold <<< $(cat $poolfilechksumstate)
  188.         zpool status ${pool} | grep ONLINE | grep -v state | awk '{print $3 $4 $5}' | grep -v 000 > $poolfilechksumstate
  189.         read -r poolchksumstate <<< $(cat $poolfilechksumstate)
  190.         if [ "${poolchksumstate}" ]; then
  191.             echo "Warning: \e[31m${poolfilechksumstate}\e[0m ZFS Chksum errors found!"
  192.             if [ "${poolchksumstate}" != "${poolchksumstateold}" ]; then
  193.                 subject="ZFS pool $pool server $myhostname checksum errors found"
  194.                 body="Warning: ZFS pool $pool has $poolchksumerrors checksum errors. Please check your disks. There will be no more automatic scrub on this pool, until this is fixed."
  195.                 send_mail
  196.                 if [ $? -ne 0 ]; then rm $poolfilechksumstate; fi
  197.             else
  198.                 echo "Mail about chksum errors was already sent"
  199.             fi
  200.         else
  201.             echo -e "\e[32m0\e[0m ZFS chksum errors found on pool $pool"
  202.         fi
  203.  
  204.         #######################################
  205.         # monitor scrub and issue if needed (doesnt run when pool is not online,
  206.         # scrub or resilver is in place, chksum errors were found or it was never run before)
  207.         if [ "$scrubrunhour" ] || [ "$scrubexpire" ]; then
  208.  
  209.         if [ ! -f "$poolmailfilescrub" ]; then
  210.             echo "0" > $poolmailfilescrub
  211.         fi
  212.         #get pool scrub mail state from last run, if not 0 then mail was sent already
  213.         read -r poolmailscrub <<< $(cat $poolmailfilescrub)
  214.  
  215.         if [ ! -f "$poolfilescrubstate" ]; then
  216.             touch $poolfilescrubstate
  217.         fi
  218.  
  219.         #get scrub state from last run
  220.         read -r scrubstateold <<< $(cat $poolfilescrubstate)
  221.         #get actual scrub state
  222.         zpool status $pool | egrep "none requested|resilver|scrub in progress|scrub repaired" > $poolfilescrubstate
  223.         read -r scrubstate <<< $(cat $poolfilescrubstate)
  224.  
  225.         if [ $(echo $scrubstate | egrep -c "none requested|resilvered") -ge 1 ]; then
  226.             echo "ZFS scrub was not run before on ${pool} or it was resilvered, cannot monitor or issue new scrubs automatically"
  227.             if [ "$health" == "ONLINE" ] && [ "$poolmailscrub" != "1" ]; then #if drive is online but scrub cannot be scheduled
  228.                 echo "Cannot schedule scrub"
  229.                 subject="ZFS pool $pool server $myhostname cannot schedule scrub"
  230.                 body="Warning: ZFS pool $pool is ONLINE but cannot be scrubed automatically, please run scrub manually one time."
  231.                 send_mail
  232.                 if [ $? -eq 0 ]; then echo "1" > $poolmailfilescrub; fi
  233.             elif [ "$health" == "ONLINE" ] && [ "$poolmailscrub" == "1" ]; then
  234.                 echo "Cannot schedule scrub, mail was already sent"
  235.             fi
  236.         elif [ "$health" == "ONLINE" ] && [ "$poolmailscrub" != "0" ]; then
  237.             echo "ZFS scrub check is working now"
  238.             subject="ZFS pool $pool on server $myhostname scrub check is ok now"
  239.             body="Notice: ZFS pool $pool will be scrubed automatically from now on"
  240.             send_mail
  241.             echo "0" > $poolmailfilescrub # reset counter
  242.         elif [ $(echo $scrubstate | egrep -c "scrub repaired 0") -ge 1 ] && [ "$scrubstate" != "$scrubstateold" ] && [ "$scrubstateold" ]; then
  243.             echo "ZFS scrub finished on ${pool}"
  244.             subject="ZFS pool $pool on server $myhostname scrub finished"
  245.             body="Scrub finished on pool $myhostname with result $scrubstate"
  246.             send_mail
  247.         elif [ $(echo $scrubstate | egrep -c "scrub repaired") -ge 1 ] && [ "$scrubstate" != "$scrubstateold" ] && [ "$scrubstateold" ]; then
  248.             echo "ZFS scrub finished on ${pool} with errors"
  249.             subject="ZFS pool $pool on server $myhostname scrub finished with errors"
  250.             body="Scrub finished on pool $myhostname with errors. There will be no subsequent scrub until error is cleared out. Result: $scrubstate"
  251.             send_mail
  252.         elif [ $(echo $scrubstate | egrep -c "scrub in progress|resilver") -ge 1 ]; then
  253.             echo "ZFS scrub or resilver is already in progress on ${pool}, cannot schedule scrub"
  254.         elif [ "$health" != "ONLINE" ]; then
  255.             echo "Cannot schedule scrub, because ZFS pool $pool is not online."
  256.         elif [ "${poolchksumerrors}" ]; then
  257.             echo "Cannot scrub, because ZFS pool $pool has chksum errors."
  258.         elif [ $(echo $scrubstate | egrep -c "scrub repaired 0") -ge 1 ] && [ "$health" == "ONLINE" ]; then
  259.             #everything ok, scrub can be scheduled
  260.             if [ "$platform" == "FreeBSD" ] || [ "$platform" == "Unix" ]; then
  261.                 #scrubrawdate=$(zpool status $pool | grep scrub | awk '{print $15 $12 $13}')
  262.                 scrubrawdate=$(echo $scrubstate | grep "scrub repaired" | awk '{print $15 $12 $13}')
  263.                 scrubdate=$(date -j -f '%Y%b%e-%H%M%S' $scrubrawdate'-000000' +%s)
  264.             elif [ $platform == "Linux" ]; then
  265.                 #scrubrawdate=$(zpool status $pool | grep scrub | awk '{print $11" "$12" " $13" " $14" "$15}')
  266.                 scrubrawdate=$(echo $scrubstate | grep "scrub repaired" | awk '{print $11" "$12" " $13" " $14" "$15}')
  267.                 scrubdate=$(date -d "$scrubrawdate" +%s)
  268.             fi
  269.  
  270.             if [ $(($currentscrubdate - $scrubdate)) -ge $scrubexpire ] && [ ! -z "$scrubdate" ]; then
  271.                 echo "Scrub expired on ZFS pool ${pool}"
  272.                 if [ "$(date +%H)" == "$scrubrunhour" ]; then
  273.                     subject="ZFS pool $pool on server $myhostname scrub started"
  274.                     body="Scrub started on pool $myhostname because it was expired and scheduled for ${scrubrunhour}:00h. Please do not reboot until scrub is finished. The last scrub state was $scrubstate"
  275.                     send_mail
  276.                     zpool scrub $pool
  277.                 else
  278.                     echo "ZFS scrub will be scheduled for ${scrubrunhour}:00h"
  279.                 fi
  280.             else
  281.                 if [ ! -z "$scrubdate" ]; then
  282.                     echo "ZFS scrub is not expired yet on pool $pool"
  283.                 else
  284.                     echo "ZFS scrub on pool $pool cant be checked because unknown OS or cant get date"
  285.                 fi
  286.             fi
  287.         else
  288.             echo "Something went wrong with scrub on pool $pool, could not get scrub state"
  289.         fi
  290.  
  291.         else
  292.             echo "Cannot monitor scrub because scrubrunhour or scrubexpire parameter missing"
  293.         fi
  294.  
  295.         #########################################
  296.         #for disk space warnings
  297.  
  298.         if [ "$minfreegig" ]; then
  299.  
  300.             #get free gig space from zfs list because zpool doesnt show correct value on raidz
  301.             capfreegigfullstring=$(zfs list -H -o avail $pool)
  302.             capfreegig=$(echo $capfreegigfullstring | sed 's/.$//' | tr . ,) #remove last string and replace all dot with comma
  303.             if [ "${capfreegigfullstring: -1}" = "T" ]; then
  304.                 capfreegig=$((capfreegig*1024)) #convert T to G, takes only comma as decimal separator
  305.             fi
  306.             capfreegig=$(echo $capfreegig | tr , . | xargs printf "%.*f\n" 0) #round value, beware: takes only dot as decimal separator
  307.             capusedperc=$(echo $output | awk '{ print $3 }' | sed 's/.$//' | xargs printf "%.*f\n" 0)
  308.  
  309.             if [ "$capusedperc" -gt "$maxusedperc" ] || [ "$capfreegig" -lt "$minfreegig" ]; then #doesnt work with decimal numbers
  310.  
  311.                 #if day from file is not today then we need to reset counter
  312.                 if [ $daynow != $dayfromfile ]; then
  313.                     echo 0 > $spacewarncountfilenew
  314.                 fi
  315.  
  316.                 read -r warncount <<< $(cat $spacewarncountfilenew)
  317.                 warncount=$((warncount+1))
  318.                 echo $warncount > $spacewarncountfilenew
  319.  
  320.                 if [ "$maxwarncountperday" -ge "$warncount" ]; then
  321.                     echo -e "Disk space on ZFS pool $pool is in \e[31mWARNING\e[0m state, will send mail ..."
  322.                     subject="Disk space full on server $myhostname pool $pool"
  323.                     body="Used capacity $capusedperc percent is greater than $maxusedperc percent threshold or free capacity $capfreegig G is lower than $minfreegig G threshold"
  324.                     send_mail
  325.                     if [ $? -ne 0 ]; then echo 0 > $spacewarncountfilenew; fi
  326.                 else
  327.                     echo -e "Disk space on ZFS pool $pool is in \e[31mWARNING\e[0m state, cannot send mail because maxwarncountperday threshold reached"
  328.                 fi
  329.  
  330.             else
  331.                 echo -e "Free disk space \e[32mOK\e[0m on on ZFS pool ${pool} - used cap ${capusedperc}% free space ${capfreegig}G"
  332.             fi
  333.  
  334.         else
  335.             echo "Cannot run disk space check, because minfreegig parameter is missing"
  336.         fi
  337.  
  338.         ###########################################
  339.         #the mail part for health
  340.  
  341.         if [ ! -f "$poolmailfilehealth" ]; then
  342.             echo "0" > $poolmailfilehealth
  343.         fi
  344.  
  345.         #get pool health state from last run, if not 0 then mail was sent already
  346.         read -r poolmailhealth <<< $(cat $poolmailfilehealth)
  347.  
  348.         if [ "$health" != "ONLINE" ] && [ "$poolmailhealth" == "0" ]; then # if pool is not online and mail not sent yet
  349.             echo "ZFS pool $pool is not healthy, will send mail ..."
  350.             subject="ZFS pool $pool on server $myhostname is not healthy."
  351.             body="Warning: ZFS pool $pool is in $health state. Checksum errors: ${poolchksumerrors}. There will be no more automatic scrubs and no more error mails until this is fixed."
  352.             send_mail
  353.             if [ $? -eq 0 ]; then echo "1" > $poolmailfilehealth; fi
  354.         elif [ "$health" != "ONLINE" ] && [ "$poolmailhealth" != "0" ]; then # if pool is not online and mail was sent already
  355.             echo "ZFS pool $pool is not healthy, mail was already sent"
  356.         elif [ "$health" == "ONLINE" ] && [ "$poolmailhealth" != "0" ]; then #if poolmailhealth contains 1 then mail was already sent
  357.             subject="ZFS pool $pool on server $myhostname health state is ok"
  358.             body="Notice: ZFS pool $pool returned to $health state"
  359.             send_mail
  360.             if [ $? -eq 0 ]; then echo "0" > $poolmailfilehealth; fi #reset counter
  361.         fi
  362.  
  363.  
  364.  
  365.  
  366.  
  367. done
  368.  
  369.  
  370. #####################################################################################################
  371. ## checking S.M.A.R.T. health and attributes
  372.  
  373. #files for storing errors
  374. smartpath="${smarterrorfile%/*}"
  375. smartfilename=$(basename "$smarterrorfile")
  376. smartfilename="${smartfilename%.*}"
  377. smartext="${smarterrorfile##*.}"
  378.  
  379. #get list of disks
  380. if [ "$platform" == "Linux" ]; then
  381.     harddisks=$(lsblk -l | grep 'sd[a-z][^1-99]' | awk '{ print $1 }')
  382. elif [ "$platform" == "FreeBSD" ]; then
  383.     harddisks=$(egrep 'da[0-99]' /var/run/dmesg.boot | sed 's/://' | awk '{ print $1 }' | uniq)
  384. fi
  385.  
  386. ##############################
  387. # iterate through hard disks
  388.  
  389. for disk in $harddisks; do
  390.     smarterrordiskfile="${smartpath}/${smartfilename}-${disk}.${smartext}"
  391.     if [ -f $smarterrordiskfile ]; then
  392.         read -r smarterrorsold <<< $(cat $smarterrordiskfile)
  393.     fi
  394.     cat /dev/null > $smarterrordiskfile
  395.     if [ ! -z "${!disk}" ]; then
  396.         smartctl -A -H /dev/${disk} -d ${!disk} | awk '{ print $1,$2,$4,$6,$10 }' > $smartcache
  397.     else
  398.         smartctl -A -H /dev/${disk} | awk '{ print $1,$2,$4,$6,$10 }' > $smartcache
  399.     fi
  400.     cat $smartcache | while read output
  401.     do
  402.     diskhealth=$(echo $output | grep "overall-health" | awk '{ print $4 }')
  403.     id5=$(echo $output | grep "Reallocated_Sector" | awk '{ print $5}')
  404.     id10=$(echo $output | grep "Spin_Retry" | awk '{ print $5}')
  405.     id196=$(echo $output | grep "Reallocated_Event" | awk '{ print $5}')
  406.     id197=$(echo $output | grep "Current_Pending" | awk '{ print $5}')
  407.     id198=$(echo $output | grep "Offline_Uncorrectable" | awk '{ print $5}')
  408.     id199=$(echo $output | grep "UDMA_CRC" | awk '{ print $5}')
  409.     id233=$(echo $output | grep "Media_Wearout" | awk '{ print $3}')
  410.     if [ ! -z "$diskhealth" ] && [ $diskhealth != "PASSED" ]; then
  411.         echo "-------------------------------------------"
  412.         echo -e "S.M.A.R.T health state is \e[31mFAILED\e[0m on disk $disk" > $smarterrordiskfile
  413.     elif [ ! -z "$diskhealth" ] && [ $diskhealth == "PASSED" ]; then
  414.         echo "-------------------------------------------"
  415.         echo -e "S.M.A.R.T. health state is \e[32mOK\e[0m on disk $disk"
  416.     elif [ ! -z "$diskhealth" ]; then
  417.         echo "-------------------------------------------"
  418.         echo -e "S.M.A.R.T. health state is \e[31mUNKNOWN\e[0m on disk $disk"
  419.     fi
  420.  
  421.     if [ ! -z "$id5" ] && [ "$id5" -gt "0" ]; then
  422.         echo "${disk}: $id5 reallocated sectors" | tee -a $smarterrordiskfile
  423.     elif [ ! -z "$id5" ]; then
  424.         echo "${disk}: 0 reallocated sectors"
  425.     fi
  426.     if [ ! -z "$id10" ] && [ "$id10" -gt "0" ]; then
  427.         echo "${disk}: $id10 spin retry count" | tee -a $smarterrordiskfile
  428.     elif [ ! -z "$id10" ]; then
  429.         echo "${disk}: 0 spin retry count"
  430.     fi
  431.     if [ ! -z "$id196" ] && [ "$id196" -gt "0" ]; then
  432.         echo "${disk}: $id196 reallocation events" | tee -a $smarterrordiskfile
  433.     elif [ ! -z "$id196" ]; then
  434.         echo "${disk}: 0 reallocation events"
  435.     fi
  436.     if [ ! -z "$id197" ] && [ "$id197" -gt "0" ]; then
  437.         echo "${disk}: $id197 pending sectors" | tee -a $smarterrordiskfile
  438.     elif [ ! -z "$id197" ]; then
  439.         echo "${disk}: 0 pending sectors"
  440.     fi
  441.     if [ ! -z "$id198" ] && [ "$id198" -gt "0" ]; then
  442.         echo "${disk}: $id198 offline uncorrectable" | tee -a $smarterrordiskfile
  443.     elif [ ! -z "$id198" ]; then
  444.         echo "${disk}: 0 offline uncorrectable"
  445.     fi
  446.     if [ ! -z "$id199" ] && [ "$id199" -gt "0" ]; then
  447.         echo "${disk}: $id199 UDMA CRC error" | tee -a $smarterrordiskfile
  448.     elif [ ! -z "$id199" ]; then
  449.         echo "${disk}: 0 UDMA CRC error"
  450.     fi
  451.     if [ ! -z "$id202" ] && [ "$id202" -le "5" ]; then
  452.         echo "${disk}: $id202 percent lifetime problem" | tee -a $smarterrordiskfile
  453.     elif [ ! -z "$id202" ]; then
  454.         echo "${disk}: percent lifetime is ${id233}"
  455.     fi
  456.     if [ ! -z "$id233" ] && [ "$id233" -le "5" ]; then
  457.         echo "${disk}: $id233 media wearout problem" | tee -a $smarterrordiskfile
  458.     elif [ ! -z "$id233" ]; then
  459.         echo "${disk}: media wearout is ${id233}"
  460.     fi
  461.  
  462.     done
  463.  
  464.     read -r smarterrors <<< $(cat $smarterrordiskfile) #get new values from file
  465.  
  466.     if [ "$smarterrors" ] && [ "$smarterrors" != "$smarterrorsold" ]; then
  467.         echo -e "S.M.A.R.T. attributes \e[31mFAILURE\e[0m on disk $disk"
  468.         subject="Disk $disk on server $myhostname has S.M.A.R.T. attribute errors"
  469.         body="Disk $disk has attribute errors ${smarterrors}. There will be no more mail until status will change."
  470.         send_mail
  471.         if [ $? -ne 0 ]; then rm $smarterrordiskfile; fi
  472.     elif [ ! "$smarterrors" ] && [ "$smarterrorsold" ]; then
  473.         echo -e "S.M.A.R.T. attributes \e[32mRECOVERED\e[0m on disk $disk"
  474.         subject="Disk $disk on server $myhostname S.M.A.R.T. recovered from errors"
  475.         body="Disk $disk has no more attribute errors."
  476.         send_mail
  477.         if [ $? -ne 0 ]; then rm $smarterrordiskfile; fi
  478.     elif [ ! "$smarterrors" ]; then
  479.         echo -e "S.M.A.R.T. attributes \e[32mOK\e[0m on disk $disk"
  480.     elif [ "$smarterrors" ]; then
  481.         echo -e "S.M.A.R.T. attributes \e[31mFAILURE\e[0m on disk ${disk}, mail was already sent"
  482.     fi
  483.  
  484. done
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement