Advertisement
Guest User

Untitled

a guest
May 28th, 2015
240
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 14.51 KB | None | 0 0
  1. #!/bin/bash
  2. #######################################################################
  3. # This is a helper script that keeps snapraid parity info in sync with
  4. # your data and optionally verifies the parity info. Here's how it works:
  5. # 1) It first calls diff to figure out if the parity info is out of sync.
  6. # 2) If parity info is out of sync, AND the number of deleted files exceed
  7. # X (configurable), it triggers an alert email and stops. (In case of
  8. # accidental deletions, you have the opportunity to recover them from
  9. # the existing parity info)
  10. # 3) If partiy info is out of sync, AND the number of deleted files exceed X
  11. # AND it has reached/exceeded Y (configurable) number of warnings, force
  12. # a sync. (Useful when you get a false alarm above and you can't be bothered
  13. # to login and do a manual sync. Note the risk is if its not a false alarm
  14. # and you can't access the box before Y number of times the job is run to
  15. # fix the issue... Well I hope you have other backups...)
  16. # 4) If parity info is out of sync BUT the number of deleted files did NOT
  17. # exceed X, it calls sync to update the parity info.
  18. # 5) If the parity info is in sync (either because nothing changed or after it
  19. # has successfully completed the sync job, it runs the scrub command to
  20. # validate the integrity of the data (both the files and the parity info).
  21. # Note that each run of the scrub command will validate only a (configurable)
  22. # portion of parity info to avoid having a long running job and affecting
  23. # the performance of the box.
  24. # 6) Once all jobs are completed, it sends an email with the output to user
  25. # (if configured).
  26. #
  27. # Author: SidneyC <sidneyc_at_outlook_dot_com>
  28. #
  29. # CHANGELOG
  30. # ---------
  31. # 23/10/2011 Initial release
  32. # 04/01/2015 Updated script to handle changes in SnapRAID v7.0
  33. # Added scrub job as an optional task (after diff and sync)
  34. # 06/01/2015 Made the script more robust by adding checks to make sure preceding
  35. # jobs completed as expected before continuing with the subsequent jobs.
  36. # Made emailing output to user optional.
  37. # 24/01/2015 Inserted a sed step to clean up crlf (aka dos/unix formatting issue)
  38. # in sync & scrub outputs.
  39. # Detect sync and scrub job failures and highlight to user via warning
  40. # subject line in email to user.
  41. # 25/01/2015 Added option to reduce progress report output in email (default is 2 -
  42. # report only in 10% intervals).
  43. # 26/01/2015 For terse = 2 setting, removed lines for 1-8% from output
  44. # 05/02/2015 Added logic to perform forced sync after X number of warnings
  45. # Cleaned up formatting in script file (changed tabs to spaces)
  46. # Made consistent the use of [ in the test statements
  47. # 08/02/2015 Added warning number to the email subject line so that it is easier to
  48. # tell how many warnings have been issued so far
  49. # 04/03/2015 Corrected Scrub job status check (i.e. added check for text "Nothing
  50. # to do") to avoid sending false warning email
  51. #
  52. #######################################################################
  53.  
  54. ## USER DEFINED SETTINGS ##
  55. # address where the output of the jobs will be emailed to.
  56. # comment it out to disable email output
  57. EMAIL_ADDRESS="root"
  58.  
  59. # Set the threshold of deleted files to stop the sync job from running.
  60. # NOTE that depending on how active your filesystem is being used, a low
  61. # number here may result in your parity info being out of sync often and/or
  62. # you having to do lots of manual sync.
  63. DEL_THRESHOLD=20
  64.  
  65. # Set number of warnings before we force a sync job.
  66. # This option comes in handy when you cannot be bothered to manually
  67. # start a sync job when DEL_THRESHOLD is breached due to false alarm.
  68. # Set to 0 to ALWAYS force a sync (i.e. ignore the delete threshold above)
  69. # Set to -1 to NEVER force a sync (i.e. need to manual sync if delete threshold is breached)
  70. SYNC_WARN_THRESHOLD=3
  71.  
  72. # Set percentage of array to scrub if it is in sync.
  73. # i.e. 0 to disable and 100 to scrub the full array in one go
  74. # WARNING - depending on size of your array, setting to 100 will take a very long time!
  75. SCRUB_PERCENT=5
  76. SCRUB_AGE=10
  77.  
  78. # this script will log its actions to a file at this location
  79. LOG_FILE="/tmp/snapRAID.log"
  80. # location of the snapraid binary
  81. SNAPRAID_BIN="/usr/bin/snapraid"
  82. # location of the mail program binary
  83. MAIL_BIN="/usr/bin/mail"
  84.  
  85. # how much progress output do we want to keep in email
  86. # Default is 2 which means report progress in 10% intervals
  87. # Set to 1 to report progress in 1% intervals
  88. # Set to 0 to report everything
  89. TERSE=2
  90.  
  91. ## INTERNAL TEMP VARS ##
  92. EMAIL_SUBJECT_PREFIX="[`hostname`] SnapRAID - "
  93. TMP_OUTPUT="/tmp/snapRAID.out"
  94. SYNC_WARN_FILE="/tmp/snapRAID.warnCount"
  95. SYNC_WARN_COUNT=""
  96.  
  97. # auto determine names of content and parity files
  98. CONTENT_FILE=`cat /etc/snapraid.conf | grep snapraid.content | head -n 1 | cut -d " " -f2`
  99. PARITY_FILE=`cat /etc/snapraid.conf | grep snapraid.parity | head -n 1 | cut -d " " -f2`
  100.  
  101. # load configuration file if it exits
  102. if [ -f /etc/snapraid_check.conf ]; then
  103. source /etc/snapraid_check.conf
  104. fi
  105.  
  106. # redirect all stdout to log file (leave stderr alone thou)
  107. exec >> $LOG_FILE
  108.  
  109. # timestamp the job
  110. echo "[`date`] SnapRAID Job started."
  111. echo "SnapRAID DIFF Job started on `date`" > $TMP_OUTPUT
  112. echo "----------------------------------------" >> $TMP_OUTPUT
  113.  
  114. #TODO - mount and unmount parity disk on demand!
  115.  
  116. #sanity check first to make sure we can access the content and parity files
  117. if [ ! -e $CONTENT_FILE ]; then
  118. echo "[`date`] ERROR - Content file ($CONTENT_FILE) not found!"
  119. echo "ERROR - Content file ($CONTENT_FILE) not found!" >> $TMP_OUTPUT
  120. exit 1;
  121. fi
  122.  
  123. if [ ! -e $PARITY_FILE ]; then
  124. echo "[`date`] ERROR - Parity file ($PARITY_FILE) not found!"
  125. echo "ERROR - Parity file ($PARITY_FILE) not found!" >> $TMP_OUTPUT
  126. exit 1;
  127. fi
  128.  
  129. # run the snapraid DIFF command
  130. echo "[`date`] Running DIFF Command."
  131. $SNAPRAID_BIN diff >> $TMP_OUTPUT
  132. # wait for the above cmd to finish
  133. wait
  134.  
  135. echo "----------------------------------------" >> $TMP_OUTPUT
  136. echo "SnapRAID DIFF Job finished on `date`" >> $TMP_OUTPUT
  137. JOBS_DONE="DIFF"
  138.  
  139. DEL_COUNT=$(grep -w '^ \{1,\}[0-9]* removed$' $TMP_OUTPUT | sed 's/^ *//g' | cut -d ' ' -f1)
  140. ADD_COUNT=$(grep -w '^ \{1,\}[0-9]* added$' $TMP_OUTPUT | sed 's/^ *//g' | cut -d ' ' -f1)
  141. MOVE_COUNT=$(grep -w '^ \{1,\}[0-9]* moved$' $TMP_OUTPUT | sed 's/^ *//g' | cut -d ' ' -f1)
  142. COPY_COUNT=$(grep -w '^ \{1,\}[0-9]* copied$' $TMP_OUTPUT | sed 's/^ *//g' | cut -d ' ' -f1)
  143. UPDATE_COUNT=$(grep -w '^ \{1,\}[0-9]* updated$' $TMP_OUTPUT | sed 's/^ *//g' | cut -d ' ' -f1)
  144.  
  145. # sanity check to make sure that we were able to get our counts from the output of the DIFF job
  146. if [ -z "$DEL_COUNT" -o -z "$ADD_COUNT" -o -z "$MOVE_COUNT" -o -z "$COPY_COUNT" -o -z "$UPDATE_COUNT" ]; then
  147. # failed to get one or more of the count values, lets report to user and exit with error code
  148. echo "[`date`] ERROR - failed to get one or more count values. Unable to proceed. Exiting script."
  149. if [ $EMAIL_ADDRESS ]; then
  150. $MAIL_BIN -s "$EMAIL_SUBJECT_PREFIX WARNING - Unable to proceed with SYNC/SCRUB job(s). Check DIFF job output inside." "$EMAIL_ADDRESS" < $TMP_OUTPUT
  151. fi
  152. exit 1;
  153. fi
  154.  
  155. echo "SUMMARY of changes - Added [$ADD_COUNT] - Deleted [$DEL_COUNT] - Moved [$MOVE_COUNT] - Copied [$COPY_COUNT] - Updated [$UPDATE_COUNT]" >> $TMP_OUTPUT
  156.  
  157. # check if the conditions to run SYNC are met
  158. # CHK 1 - if files have changed
  159. if [ $DEL_COUNT -gt 0 -o $ADD_COUNT -gt 0 -o $MOVE_COUNT -gt 0 -o $COPY_COUNT -gt 0 -o $UPDATE_COUNT -gt 0 ]; then
  160. # CHK 1 - YES, files have changed
  161. # CHK 2 - if number of deleted files exceed DEL_THRESHOLD
  162. if [ $DEL_COUNT -lt $DEL_THRESHOLD ]; then
  163. # CHK 2 - NO, delete threshold not reached, lets run the sync job
  164. echo "Deleted files ($DEL_COUNT) did not exceed threshold ($DEL_THRESHOLD), proceeding with sync job." >> $TMP_OUTPUT
  165. echo "[`date`] Changes detected [A-$ADD_COUNT,D-$DEL_COUNT,M-$MOVE_COUNT,C-$COPY_COUNT,U-$UPDATE_COUNT] and deleted files ($DEL_COUNT) is below threshold ($DEL_THRESHOLD). Running SYNC Command."
  166. DO_SYNC=1
  167. else
  168. #CHK 2 - YES, delete threshold breached! print warning message to both outputs
  169. echo "Number of deleted files ($DEL_COUNT) exceeded threshold ($DEL_THRESHOLD)." >> $TMP_OUTPUT
  170. echo "[`date`] WARNING - Deleted files ($DEL_COUNT) exceeded threshold ($DEL_THRESHOLD). Check $TMP_OUTPUT for details."
  171. # CHK 3 - if forced sync is set
  172. if [ $SYNC_WARN_THRESHOLD -gt -1 ]; then
  173. # CHK 3 - YES
  174. echo "Forced sync is enabled." >> $TMP_OUTPUT
  175. echo "[`date`] Forced sync is enabled."
  176. # CHK 4 - if number of warnings has exceeded threshold
  177. SYNC_WARN_COUNT=$(sed 'q;/^[0-9][0-9]*$/!d' $SYNC_WARN_FILE 2>/dev/null)
  178. SYNC_WARN_COUNT=${SYNC_WARN_COUNT:-0} #value is zero if file does not exist or does not contain what we are expecting
  179. if [ $SYNC_WARN_COUNT -ge $SYNC_WARN_THRESHOLD ]; then
  180. # CHK 5 - YES, lets force a sync job. Do not need to remove warning marker here as it is automatically removed when the sync job is run by this script
  181. echo "Number of warning(s) ($SYNC_WARN_COUNT) has reached/exceeded threshold ($SYNC_WARN_THRESHOLD). Forcing a sync job to run." >> $TMP_OUTPUT
  182. echo "[`date`] Number of warning(s) ($SYNC_WARN_COUNT) has reached/exceeded threshold ($SYNC_WARN_THRESHOLD). Forcing a sync job to run."
  183. DO_SYNC=1
  184. else
  185. # CHK 4 - NO, so let's increment the warning count and skip the sync job
  186. ((SYNC_WARN_COUNT += 1))
  187. echo $SYNC_WARN_COUNT > $SYNC_WARN_FILE
  188. echo "$((SYNC_WARN_THRESHOLD - SYNC_WARN_COUNT)) warning(s) till forced sync. NOT proceeding with sync job." >> $TMP_OUTPUT
  189. echo "[`date`] $((SYNC_WARN_THRESHOLD - SYNC_WARN_COUNT)) warning(s) till forced sync. NOT proceeding with sync job."
  190. DO_SYNC=0
  191. fi
  192. else
  193. # CHK 3 - NO, so let's skip SYNC
  194. echo "Forced sync is not enabled. NOT proceeding with sync job. Please run sync manually if this is not an error condition." >> $TMP_OUTPUT
  195. echo "[`date`] Forced sync is not enabled. Check $TMP_OUTPUT for details. NOT proceeding with sync job."
  196. DO_SYNC=0
  197. fi
  198. fi
  199. else
  200. # CHK 1 - NO, so let's skip SYNC
  201. echo "[`date`] No change detected. Not running SYNC job."
  202. DO_SYNC=0
  203. fi
  204.  
  205. # Now run sync if conditions are met
  206. if [ $DO_SYNC -eq 1 ]; then
  207. echo "SnapRAID SYNC Job started on `date`" >> $TMP_OUTPUT
  208. echo "----------------------------------------" >> $TMP_OUTPUT
  209. $SNAPRAID_BIN sync | sed -e 's/\r/\n/g' >> $TMP_OUTPUT
  210. #wait for the job to finish
  211. wait
  212. echo "----------------------------------------" >> $TMP_OUTPUT
  213. echo "SnapRAID SYNC Job finished on `date`" >> $TMP_OUTPUT
  214. JOBS_DONE="$JOBS_DONE + SYNC"
  215. # insert SYNC marker to 'Everything OK' string to differentiate it from SCRUB job later
  216. sed -i 's/^Everything OK/SYNC-Everything OK/g' $TMP_OUTPUT
  217. # Remove any warning flags if set previously. This is done in this step to take care of scenarios when user has manually synced or restored deleted files and we will have missed it in the checks above.
  218. if [ -e $SYNC_WARN_FILE ]; then
  219. rm $SYNC_WARN_FILE
  220. fi
  221. fi
  222.  
  223. # Moving onto scrub now. Check if user has enabled scrub
  224. if [ $SCRUB_PERCENT -gt 0 ]; then
  225. # YES, first let's check if delete threshold has been breached and we have not forced a sync.
  226. if [ $DEL_COUNT -gt $DEL_THRESHOLD -a $DO_SYNC -eq 0 ]; then
  227. # YES, parity is out of sync so let's not run scrub job
  228. echo "[`date`] Scrub job cancelled as parity info is out of sync (deleted files threshold has been breached)."
  229. else
  230. # NO, delete threshold has not been breached OR we forced a sync, but we have one last test -
  231. # let's make sure if sync ran, it completed successfully (by checking if snapRAID wrote "Everything OK" to the output).
  232. if [ $DO_SYNC -eq 1 -a -z "$(grep -w "SYNC-Everything OK" $TMP_OUTPUT)" ]; then
  233. # Sync ran but did not complete successfully so lets not run scrub to be safe
  234. echo "[`date`] WARNING - check output of SYNC job. Could not detect string <Everything OK>. Not proceeding with SCRUB job."
  235. echo "WARNING - check output of SYNC job. Could not detect string <Everything OK>. Not proceeding with SCRUB job." >> $TMP_OUTPUT
  236. else
  237. # Everything ok - let's run the scrub job!
  238. echo "[`date`] Running SCRUB Command."
  239. echo "SnapRAID SCRUB Job started on `date`" >> $TMP_OUTPUT
  240. echo "----------------------------------------" >> $TMP_OUTPUT
  241. $SNAPRAID_BIN scrub -p $SCRUB_PERCENT -o $SCRUB_AGE | sed -e 's/\r/\n/g' >> $TMP_OUTPUT
  242. #wait for the job to finish
  243. wait
  244. echo "----------------------------------------" >> $TMP_OUTPUT
  245. echo "SnapRAID SCRUB Job finished on `date`" >> $TMP_OUTPUT
  246. JOBS_DONE="$JOBS_DONE + SCRUB"
  247. # insert SCRUB marker to 'Everything OK' string to differentiate it from SYNC job above
  248. sed -i 's/^Everything OK/SCRUB-Everything OK/g' $TMP_OUTPUT
  249. fi
  250. fi
  251. else
  252. echo "[`date`] Scrub job is not scheduled. Not running SCRUB job."
  253. fi
  254.  
  255. # all jobs done, let's send output to user if configured
  256. if [ $EMAIL_ADDRESS ]; then
  257. echo "[`date`] Email address is set. Sending email report to <$EMAIL_ADDRESS>"
  258. # check if deleted count exceeded threshold
  259. if [ $DEL_COUNT -gt $DEL_THRESHOLD -a $DO_SYNC -eq 0 ]; then
  260. # YES, lets inform user with an appropriate subject line
  261. $MAIL_BIN -s "$EMAIL_SUBJECT_PREFIX WARNING $SYNC_WARN_COUNT - Number of deleted files ($DEL_COUNT) exceeded threshold ($DEL_THRESHOLD)" "$EMAIL_ADDRESS" < $TMP_OUTPUT
  262. elif [ -z "${JOBS_DONE##*"SYNC"*}" -a -z "$(grep -w "SYNC-Everything OK" $TMP_OUTPUT)" ]; then
  263. # Sync ran but did not complete successfully so lets warn the user
  264. $MAIL_BIN -s "$EMAIL_SUBJECT_PREFIX WARNING - SYNC job ran but did not complete successfully" "$EMAIL_ADDRESS" < $TMP_OUTPUT
  265. elif [ -z "${JOBS_DONE##*"SCRUB"*}" -a -z "$(grep -w "SCRUB-Everything OK" $TMP_OUTPUT)" -a -z "$(grep -w "Nothing to do" $TMP_OUTPUT)" ]; then
  266. # Scrub ran but did not complete successfully so lets warn the user
  267. $MAIL_BIN -s "$EMAIL_SUBJECT_PREFIX WARNING - SCRUB job ran but did not complete successfully" "$EMAIL_ADDRESS" < $TMP_OUTPUT
  268. else
  269. # OPTIONALLY, let's reduce the amount of status lines in output.
  270. if [ $TERSE -gt 1 ]; then
  271. # Report progress in interval of tens %
  272. sed -i '$!N; /^\([0-9]\).*\n\1.*$/!P; D' $TMP_OUTPUT
  273. sed -i '/^[1-8]%.*$/d' $TMP_OUTPUT
  274. elif [ $TERSE -gt 0 ]; then
  275. # Report progress in interval of ones %
  276. sed -i '$!N; /^\([0-9]*\)%.*\n\1.*$/!P; D' $TMP_OUTPUT
  277. fi
  278. $MAIL_BIN -s "$EMAIL_SUBJECT_PREFIX INFO - $JOBS_DONE Jobs COMPLETED" "$EMAIL_ADDRESS" < $TMP_OUTPUT
  279. fi
  280. fi
  281.  
  282. echo "[`date`] All jobs ended."
  283.  
  284. exit 0;
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement