Advertisement
Guest User

Untitled

a guest
Mar 28th, 2020
250
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Bash 5.04 KB | None | 0 0
  1. #! /bin/sh
  2. #
  3. # Calomel.org
  4. #     https://calomel.org/zfs_health_check_script.html
  5. #     FreeBSD ZFS Health Check script
  6. #     zfs_health.sh @ Version 0.18
  7.  
  8. # Check health of ZFS volumes and drives. On any faults send email.
  9.  
  10.  
  11. # 99 problems but ZFS aint one
  12. problems=0
  13.  
  14.  
  15. # Health - Check if all zfs volumes are in good condition. We are looking for
  16. # any keyword signifying a degraded or broken array.
  17.  
  18. condition=$(/sbin/zpool status | egrep -i '(DEGRADED|FAULTED|OFFLINE|UNAVAIL|REMOVED|FAIL|DESTROYED|corrupt|cannot|unrecover)')
  19. if [ "${condition}" ]; then
  20.         emailSubject="`hostname` - ZFS pool - HEALTH fault"
  21.         problems=1
  22. fi
  23.  
  24.  
  25. # Capacity - Make sure the pool capacity is below 80% for best performance. The
  26. # percentage really depends on how large your volume is. If you have a 128GB
  27. # SSD then 80% is reasonable. If you have a 60TB raid-z2 array then you can
  28. # probably set the warning closer to 95%.
  29. #
  30. # ZFS uses a copy-on-write scheme. The file system writes new data to
  31. # sequential free blocks first and when the uberblock has been updated the new
  32. # inode pointers become valid. This method is true only when the pool has
  33. # enough free sequential blocks. If the pool is at capacity and space limited,
  34. # ZFS will be have to randomly write blocks. This means ZFS can not create an
  35. # optimal set of sequential writes and write performance is severely impacted.
  36.  
  37. maxCapacity=80
  38.  
  39. if [ ${problems} -eq 0 ]; then
  40.    capacity=$(/sbin/zpool list -H -o capacity | cut -d'%' -f1)
  41.    for line in ${capacity}
  42.      do
  43.        if [ $line -ge $maxCapacity ]; then
  44.          emailSubject="`hostname` - ZFS pool - Capacity Exceeded"
  45.          problems=1
  46.        fi
  47.      done
  48. fi
  49.  
  50.  
  51. # Errors - Check the columns for READ, WRITE and CKSUM (checksum) drive errors
  52. # on all volumes and all drives using "zpool status". If any non-zero errors
  53. # are reported an email will be sent out. You should then look to replace the
  54. # faulty drive and run "zpool scrub" on the affected volume after resilvering.
  55.  
  56. if [ ${problems} -eq 0 ]; then
  57.    errors=$(/sbin/zpool status | grep ONLINE | grep -v state | awk '{print $3 $4 $5}' | grep -v 000)
  58.    if [ "${errors}" ]; then
  59.         emailSubject="`hostname` - ZFS pool - Drive Errors"
  60.         problems=1
  61.    fi
  62. fi
  63.  
  64.  
  65. # Scrub Expired - Check if all volumes have been scrubbed in at least the last
  66. # 8 days. The general guide is to scrub volumes on desktop quality drives once
  67. # a week and volumes on enterprise class drives once a month. You can always
  68. # use cron to schedual "zpool scrub" in off hours. We scrub our volumes every
  69. # Sunday morning for example.
  70. #
  71. # Scrubbing traverses all the data in the pool once and verifies all blocks can
  72. # be read. Scrubbing proceeds as fast as the devices allows, though the
  73. # priority of any I/O remains below that of normal calls. This operation might
  74. # negatively impact performance, but the file system will remain usable and
  75. # responsive while scrubbing occurs. To initiate an explicit scrub, use the
  76. # "zpool scrub" command.
  77. #
  78. # The scrubExpire variable is in seconds. So for 8 days we calculate 8 days
  79. # times 24 hours times 3600 seconds to equal 691200 seconds.
  80.  
  81. scrubExpire=691200
  82.  
  83. if [ ${problems} -eq 0 ]; then
  84.    currentDate=$(date +%s)
  85.    zfsVolumes=$(/sbin/zpool list -H -o name)
  86.  
  87.   for volume in ${zfsVolumes}
  88.    do
  89.     if [ $(/sbin/zpool status $volume | egrep -c "none requested") -ge 1 ]; then
  90.         printf "ERROR: You need to run \"zpool scrub $volume\" before this script can monitor the scrub expiration time."
  91.         break
  92.     fi
  93.     if [ $(/sbin/zpool status $volume | egrep -c "scrub in progress|resilver") -ge 1 ]; then
  94.         break
  95.     fi
  96.  
  97.     ### Ubuntu with GNU supported date format
  98.     #scrubRawDate=$(/sbin/zpool status $volume | grep scrub | awk '{print $11" "$12" " $13" " $14" "$15}')
  99.     #scrubDate=$(date -d "$scrubRawDate" +%s)
  100.  
  101.     ### FreeBSD 11.2 with *nix supported date format
  102.      #scrubRawDate=$(/sbin/zpool status $volume | grep scrub | awk '{print $15 $12 $13}')
  103.      #scrubDate=$(date -j -f '%Y%b%e-%H%M%S' $scrubRawDate'-000000' +%s)
  104.  
  105.     ### FreeBSD 12.0 with *nix supported date format
  106.     scrubRawDate=$(/sbin/zpool status $volume | grep scrub | awk '{print $17 $14 $15}')
  107.     scrubDate=$(date -j -f '%Y%b%e-%H%M%S' $scrubRawDate'-000000' +%s)
  108.  
  109.      if [ $(($currentDate - $scrubDate)) -ge $scrubExpire ]; then
  110.         emailSubject="`hostname` - ZFS pool - Scrub Time Expired. Scrub Needed on Volume(s)"
  111.         problems=1
  112.      fi
  113.    done
  114. fi
  115.  
  116.  
  117. # Email - On any problems send email with drive status information and
  118. # capacities including a helpful subject line. Also use logger to write the
  119. # email subject to the local logs. This is also the place you may want to put
  120. # any other notifications like playing a sound file, beeping the internal
  121. # speaker, paging someone or updating Nagios or even BigBrother.
  122.  
  123. if [ "$problems" -ne 0 ]; then
  124.   printf '%s\n' "$emailSubject" "" "`/sbin/zpool list`" "" "`/sbin/zpool status`" | /usr/bin/mail -s "$emailSubject" root@localhost
  125.   logger $emailSubject
  126. fi
  127.  
  128. ### EOF ###
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement