Advertisement
Guest User

SnapRaid Script New

a guest
Jul 26th, 2019
621
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Bash 17.65 KB | None | 0 0
  1. #!/bin/bash
  2.  
  3. #######################################################################
  4. # This is a helper script that keeps snapraid parity info in sync with
  5. # your data and optionally verifies the parity info. Here's how it works:
  6. #   1) Shuts down configured services
  7. #   2) Calls diff to figure out if the parity info is out of sync.
  8. #   3) If parity info is out of sync, AND the number of deleted or changed files exceed
  9. #      X (each configurable), it triggers an alert email and stops. (In case of
  10. #      accidental deletions, you have the opportunity to recover them from
  11. #      the existing parity info. This also mitigates to a degree encryption malware.)
  12. #   4) If partiy info is out of sync, AND the number of deleted or changed files exceed X
  13. #      AND it has reached/exceeded Y (configurable) number of warnings, force
  14. #      a sync. (Useful when you get a false alarm above and you can't be bothered
  15. #      to login and do a manual sync. Note the risk is if its not a false alarm
  16. #      and you can't access the box before Y number of times the job is run  to
  17. #      fix the issue... Well I hope you have other backups...)
  18. #   5) If parity info is out of sync BUT the number of deleted files did NOT
  19. #      exceed X, it calls sync to update the parity info.
  20. #   6) If the parity info is in sync (either because nothing changed or after it
  21. #      has successfully completed the sync job, it runs the scrub command to
  22. #      validate the integrity of the data (both the files and the parity info).
  23. #      Note that each run of the scrub command will validate only a (configurable)
  24. #      portion of parity info to avoid having a long running job and affecting
  25. #      the performance of the box.
  26. #   7) Once all jobs are completed, it sends an email with the output to user
  27. #      (if configured).
  28. #
  29. #   Inspired by Zack Reed (http://zackreed.me/articles/83-updated-snapraid-sync-script)
  30. #   Modified version of mtompkins version of my script (https://gist.github.com/mtompkins/91cf0b8be36064c237da3f39ff5cc49d)
  31. #
  32. #######################################################################
  33.  
  34. ######################
  35. #   USER VARIABLES   #
  36. ######################
  37.  
  38. ####################### USER CONFIGURATION START #######################
  39.  
  40. # address where the output of the jobs will be emailed to.
  41. EMAIL_ADDRESS="email@email.com"
  42.  
  43. # Set the threshold of deleted files to stop the sync job from running.
  44. # NOTE that depending on how active your filesystem is being used, a low
  45. # number here may result in your parity info being out of sync often and/or
  46. # you having to do lots of manual syncing.
  47. DEL_THRESHOLD=50
  48. UP_THRESHOLD=500
  49.  
  50. # Set number of warnings before we force a sync job.
  51. # This option comes in handy when you cannot be bothered to manually
  52. # start a sync job when DEL_THRESHOLD is breached due to false alarm.
  53. # Set to 0 to ALWAYS force a sync (i.e. ignore the delete threshold above)
  54. # Set to -1 to NEVER force a sync (i.e. need to manual sync if delete threshold is breached)
  55. #SYNC_WARN_THRESHOLD=3
  56. SYNC_WARN_THRESHOLD=-1
  57.  
  58. # Set percentage of array to scrub if it is in sync.
  59. # i.e. 0 to disable and 100 to scrub the full array in one go
  60. # WARNING - depending on size of your array, setting to 100 will take a very long time!
  61. SCRUB_PERCENT=25
  62. SCRUB_AGE=10
  63.  
  64. # Set the option to log SMART info. 1 to enable, any other values to disable
  65. SMART_LOG=1
  66.  
  67. # location of the snapraid binary
  68. SNAPRAID_BIN="/usr/local/bin/snapraid"
  69. # location of the mail program binary
  70. MAIL_BIN="/usr/bin/mail"
  71.  
  72. function main(){
  73.  
  74.   ######################
  75.   #   INIT VARIABLES   #
  76.   ######################
  77.   CHK_FAIL=0
  78.   DO_SYNC=0
  79.   EMAIL_SUBJECT_PREFIX="(SnapRAID on `hostname`)"
  80.   GRACEFUL=0
  81.   SYNC_WARN_FILE="/tmp/snapRAID.warnCount"
  82.   SYNC_WARN_COUNT=""
  83.   TMP_OUTPUT="/tmp/snapRAID.out"
  84.   # Capture time
  85.   SECONDS=0
  86.  
  87.   ###############################
  88.   #   MANAGE DOCKER CONTAINERS  #
  89.   ###############################
  90.   # Set to 0 to not manage any containers.
  91.   MANAGE_SERVICES=1
  92.  
  93.   # Containers to manage (separated with spaces).
  94.   SERVICES='nzbget sonarr radarr lidarr jackett bazarr plex transmission prometheus grafana nextcloud nextcloud-mariadb watchtower organizr portainer'
  95.  
  96.   # Build Services Array...
  97.   service_array_setup
  98.  
  99.   # Expand PATH for smartctl
  100.   PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin
  101.  
  102.   # Determine names of first content file...
  103.   CONTENT_FILE=`grep -v '^$\|^\s*\#' /etc/snapraid.conf | grep snapraid.content | head -n 1 | cut -d " " -f2`
  104.  
  105.   # Build an array of parity all files...
  106.   #PARITY_FILES[0]=`cat /etc/snapraid.conf | grep "^[^#;]" | grep parity | head -n 1 | cut -d " " -f 2 | cut -d "," -f 1`
  107.   #PARITY_FILES[1]=`cat /etc/snapraid.conf | grep "^[^#;]" | grep parity | head -n 1 | cut -d " " -f 2 | cut -d "," -f 2`
  108.   #PARITY_FILES[2]=`cat /etc/snapraid.conf | grep "^[^#;]" | grep 2-parity | head -n 1 | cut -d " " -f 2 | cut -d "," -f 1`
  109.   #PARITY_FILES[3]=`cat /etc/snapraid.conf | grep "^[^#;]" | grep 2-parity | head -n 1 | cut -d " " -f 2 | cut -d "," -f 2`
  110.   IFS=$'\n' PARITY_FILES=(`cat /etc/snapraid.conf | grep "^[^#;]" | grep "^\([2-6z]-\)*parity" | cut -d " " -f 2 | tr ',' '\n'`)
  111.  
  112. ##### USER CONFIGURATION STOP ##### MAKE NO CHANGES BELOW THIS LINE ####
  113.  
  114.   # create tmp file for output
  115.   > $TMP_OUTPUT
  116.  
  117.   # Redirect all output to file and screen. Starts a tee process
  118.   output_to_file_screen
  119.  
  120.   # timestamp the job
  121.   echo "SnapRAID Script Job started [`date`]"
  122.   echo
  123.   echo "----------------------------------------"
  124.  
  125.   # Remove any plex created anomolies
  126.   echo "##Preprocessing"
  127.  
  128.   # Stop any services that may inhibit optimum execution
  129.   if [ $MANAGE_SERVICES -eq 1 ]; then
  130.     echo "###Stop Services [`date`]"
  131.     stop_services
  132.   fi
  133.  
  134.   # sanity check first to make sure we can access the content and parity files
  135.   sanity_check
  136.  
  137.   echo
  138.   echo "----------------------------------------"
  139.   echo "##Processing"
  140.  
  141.   # Fix timestamps
  142.   chk_zero
  143.  
  144.   # run the snapraid DIFF command
  145.   echo "###SnapRAID DIFF [`date`]"
  146.   $SNAPRAID_BIN diff
  147.   # wait for the above cmd to finish, save output and open new redirect
  148.   close_output_and_wait
  149.   output_to_file_screen
  150.   echo
  151.   echo "DIFF finished [`date`]"
  152.   JOBS_DONE="DIFF"
  153.  
  154.   # Get number of deleted, updated, and modified files...
  155.   get_counts
  156.  
  157.   # sanity check to make sure that we were able to get our counts from the output of the DIFF job
  158.   if [ -z "$DEL_COUNT" -o -z "$ADD_COUNT" -o -z "$MOVE_COUNT" -o -z "$COPY_COUNT" -o -z "$UPDATE_COUNT" ]; then
  159.     # failed to get one or more of the count values, lets report to user and exit with error code
  160.     echo "**ERROR** - failed to get one or more count values. Unable to proceed."
  161.     echo "Exiting script. [`date`]"
  162.     if [ $EMAIL_ADDRESS ]; then
  163.       SUBJECT="$EMAIL_SUBJECT_PREFIX WARNING - Unable to proceed with SYNC/SCRUB job(s). Check DIFF job output."
  164.       send_mail
  165.     fi
  166.     exit 1;
  167.   fi
  168.   echo
  169.   echo "**SUMMARY of changes - Added [$ADD_COUNT] - Deleted [$DEL_COUNT] - Moved [$MOVE_COUNT] - Copied [$COPY_COUNT] - Updated [$UPDATE_COUNT]**"
  170.   echo
  171.  
  172.   # check if the conditions to run SYNC are met
  173.   # CHK 1 - if files have changed
  174.   if [ $DEL_COUNT -gt 0 -o $ADD_COUNT -gt 0 -o $MOVE_COUNT -gt 0 -o $COPY_COUNT -gt 0 -o $UPDATE_COUNT -gt 0 ]; then
  175.     chk_del
  176.  
  177.     if [ $CHK_FAIL -eq 0 ]; then
  178.       chk_updated
  179.     fi
  180.  
  181.     if [ $CHK_FAIL -eq 1 ]; then
  182.       chk_sync_warn
  183.     fi
  184.   else
  185.     # NO, so let's skip SYNC
  186.     echo "No change detected. Not running SYNC job. [`date`] "
  187.     DO_SYNC=0
  188.   fi
  189.  
  190.   # Now run sync if conditions are met
  191.   if [ $DO_SYNC -eq 1 ]; then
  192.     echo "###SnapRAID SYNC [`date`]"
  193.     $SNAPRAID_BIN sync -q
  194.     #wait for the job to finish
  195.     close_output_and_wait
  196.     output_to_file_screen
  197.     echo "SYNC finished [`date`]"
  198.     JOBS_DONE="$JOBS_DONE + SYNC"
  199.     # insert SYNC marker to 'Everything OK' or 'Nothing to do' string to differentiate it from SCRUB job later
  200.     sed_me "s/^Everything OK/SYNC_JOB--Everything OK/g;s/^Nothing to do/SYNC_JOB--Nothing to do/g" "$TMP_OUTPUT"
  201.     # Remove any warning flags if set previously. This is done in this step to take care of scenarios when user
  202.     # has manually synced or restored deleted files and we will have missed it in the checks above.
  203.     if [ -e $SYNC_WARN_FILE ]; then
  204.       rm $SYNC_WARN_FILE
  205.     fi
  206.     echo
  207.   fi
  208.  
  209.   # Moving onto scrub now. Check if user has enabled scrub
  210.   if [ $SCRUB_PERCENT -gt 0 ]; then
  211.     # YES, first let's check if delete threshold has been breached and we have not forced a sync.
  212.     if [ $CHK_FAIL -eq 1 -a $DO_SYNC -eq 0 ]; then
  213.       # YES, parity is out of sync so let's not run scrub job
  214.       echo "Scrub job cancelled as parity info is out of sync (deleted or changed files threshold has been breached). [`date`]"
  215.     else
  216.       # NO, delete threshold has not been breached OR we forced a sync, but we have one last test -
  217.       # let's make sure if sync ran, it completed successfully (by checking for our marker text "SYNC_JOB--" in the output).
  218.       if [ $DO_SYNC -eq 1 -a -z "$(grep -w "SYNC_JOB-" $TMP_OUTPUT)" ]; then
  219.         # Sync ran but did not complete successfully so lets not run scrub to be safe
  220.         echo "**WARNING** - check output of SYNC job. Could not detect marker . Not proceeding with SCRUB job. [`date`]"
  221.       else
  222.         # Everything ok - let's run the scrub job!
  223.         echo "###SnapRAID SCRUB [`date`]"
  224.         $SNAPRAID_BIN scrub -p $SCRUB_PERCENT -o $SCRUB_AGE -q
  225.         #wait for the job to finish
  226.         close_output_and_wait
  227.         output_to_file_screen
  228.         echo "SCRUB finished [`date`]"
  229.         echo
  230.         JOBS_DONE="$JOBS_DONE + SCRUB"
  231.         # insert SCRUB marker to 'Everything OK' or 'Nothing to do' string to differentiate it from SYNC job above
  232.         sed_me "s/^Everything OK/SCRUB_JOB--Everything OK/g;s/^Nothing to do/SCRUB_JOB--Nothing to do/g" "$TMP_OUTPUT"
  233.       fi
  234.     fi
  235.   else
  236.     echo "Scrub job is not enabled. Not running SCRUB job. [`date`] "
  237.   fi
  238.  
  239.   echo
  240.   echo "----------------------------------------"
  241.   echo "##Postprocessing"
  242.  
  243.   # Moving onto logging SMART info if enabled
  244.   if [ $SMART_LOG -eq 1 ]; then
  245.     echo
  246.     $SNAPRAID_BIN smart
  247.     close_output_and_wait
  248.     output_to_file_screen
  249.   fi
  250.  
  251.   echo "Spinning down disks..."
  252.   $SNAPRAID_BIN down
  253.  
  254.   # Graceful restore of services outside of trap - for messaging
  255.   GRACEFUL=1
  256.   if [ $MANAGE_SERVICES -eq 1 ]; then
  257.     restore_services
  258.   fi
  259.  
  260.   echo "All jobs ended. [`date`] "
  261.  
  262.   # all jobs done, let's send output to user if configured
  263.   if [ $EMAIL_ADDRESS ]; then
  264.     echo -e "Email address is set. Sending email report to **$EMAIL_ADDRESS** [`date`]"
  265.     # check if deleted count exceeded threshold
  266.     prepare_mail
  267.  
  268.     ELAPSED="$(($SECONDS / 3600))hrs $((($SECONDS / 60) % 60))min $(($SECONDS % 60))sec"
  269.     echo
  270.     echo "----------------------------------------"
  271.     echo "##Total time elapsed for SnapRAID: $ELAPSED"
  272.  
  273.     # Add a topline to email body
  274.     sed_me "1s/^/##$SUBJECT \n/" "${TMP_OUTPUT}"
  275.     send_mail
  276.   fi
  277.  
  278.   #clean_desc
  279.  
  280.   exit 0;
  281. }
  282.  
  283. #######################
  284. # FUNCTIONS & METHODS #
  285. #######################
  286.  
  287. function sanity_check() {
  288.   if [ ! -e $CONTENT_FILE ]; then
  289.     echo "**ERROR** Content file ($CONTENT_FILE) not found!"
  290.     exit 1;
  291.   fi
  292.  
  293.   echo "Testing that all parity files are present."
  294.   for i in "${PARITY_FILES[@]}"
  295.     do
  296.       if [ ! -e $i ]; then
  297.         echo "[`date`] ERROR - Parity file ($i) not found!"
  298.         echo "ERROR - Parity file ($i) not found!" >> $TMP_OUTPUT
  299.         exit 1;
  300.       fi
  301.   done
  302.   echo "All parity files found. Continuing..."
  303. }
  304.  
  305. function get_counts() {
  306.   DEL_COUNT=$(grep -w '^ \{1,\}[0-9]* removed' $TMP_OUTPUT | sed 's/^ *//g' | cut -d ' ' -f1)
  307.   ADD_COUNT=$(grep -w '^ \{1,\}[0-9]* added' $TMP_OUTPUT | sed 's/^ *//g' | cut -d ' ' -f1)
  308.   MOVE_COUNT=$(grep -w '^ \{1,\}[0-9]* moved' $TMP_OUTPUT | sed 's/^ *//g' | cut -d ' ' -f1)
  309.   COPY_COUNT=$(grep -w '^ \{1,\}[0-9]* copied' $TMP_OUTPUT | sed 's/^ *//g' | cut -d ' ' -f1)
  310.   UPDATE_COUNT=$(grep -w '^ \{1,\}[0-9]* updated' $TMP_OUTPUT | sed 's/^ *//g' | cut -d ' ' -f1)
  311. }
  312.  
  313. function sed_me(){
  314.   # Close the open output stream first, then perform sed and open a new tee process and redirect output.
  315.   # We close stream because of the calls to new wait function in between sed_me calls.
  316.   # If we do not do this we try to close Processes which are not parents of the shell.
  317.   exec >&$out 2>&$err
  318.   $(sed -i "$1" "$2")
  319.  
  320.   output_to_file_screen
  321. }
  322.  
  323. function chk_del(){
  324.   if [ $DEL_COUNT -lt $DEL_THRESHOLD ]; then
  325.     # NO, delete threshold not reached, lets run the sync job
  326.     echo "There are deleted files. The number of deleted files, ($DEL_COUNT), is below the threshold of ($DEL_THRESHOLD). SYNC Authorized."
  327.     DO_SYNC=1
  328.   else
  329.     echo "**WARNING** Deleted files ($DEL_COUNT) exceeded threshold ($DEL_THRESHOLD)."
  330.     CHK_FAIL=1
  331.   fi
  332. }
  333.  
  334. function chk_updated(){
  335.   if [ $UPDATE_COUNT -lt $UP_THRESHOLD ]; then
  336.     echo "There are updated files. The number of updated files, ($UPDATE_COUNT), is below the threshold of ($UP_THRESHOLD). SYNC Authorized."
  337.     DO_SYNC=1
  338.   else
  339.     echo "**WARNING** Updated files ($UPDATE_COUNT) exceeded threshold ($UP_THRESHOLD)."
  340.     CHK_FAIL=1
  341.   fi
  342. }
  343.  
  344. function chk_sync_warn(){
  345.   if [ $SYNC_WARN_THRESHOLD -gt -1 ]; then
  346.     echo "Forced sync is enabled. [`date`]"
  347.  
  348.     SYNC_WARN_COUNT=$(sed 'q;/^[0-9][0-9]*$/!d' $SYNC_WARN_FILE 2>/dev/null)
  349.     SYNC_WARN_COUNT=${SYNC_WARN_COUNT:-0} #value is zero if file does not exist or does not contain what we are expecting
  350.  
  351.     if [ $SYNC_WARN_COUNT -ge $SYNC_WARN_THRESHOLD ]; then
  352.       # YES, lets force a sync job. Do not need to remove warning marker here as it is automatically removed when the sync job is run by this script
  353.       echo "Number of warning(s) ($SYNC_WARN_COUNT) has reached/exceeded threshold ($SYNC_WARN_THRESHOLD). Forcing a SYNC job to run. [`date`]"
  354.       DO_SYNC=1
  355.     else
  356.       # NO, so let's increment the warning count and skip the sync job
  357.       ((SYNC_WARN_COUNT += 1))
  358.       echo $SYNC_WARN_COUNT > $SYNC_WARN_FILE
  359.       echo "$((SYNC_WARN_THRESHOLD - SYNC_WARN_COUNT)) warning(s) till forced sync. NOT proceeding with SYNC job. [`date`]"
  360.       DO_SYNC=0
  361.     fi
  362.   else
  363.     # NO, so let's skip SYNC
  364.     echo "Forced sync is not enabled. Check $TMP_OUTPUT for details. NOT proceeding with SYNC job. [`date`]"
  365.     DO_SYNC=0
  366.   fi
  367. }
  368.  
  369. function chk_zero(){
  370.   echo "###SnapRAID TOUCH [`date`]"
  371.   echo "Checking for zero sub-second files."
  372.   TIMESTATUS=$($SNAPRAID_BIN status | grep 'You have [1-9][0-9]* files with zero sub-second timestamp\.' | sed 's/^You have/Found/g')
  373.   if [ -n "$TIMESTATUS" ]; then
  374.     echo "$TIMESTATUS"
  375.     echo "Running TOUCH job to timestamp. [`date`]"
  376.     $SNAPRAID_BIN touch
  377.     close_output_and_wait
  378.     output_to_file_screen
  379.     echo "TOUCH finished [`date`]"
  380.   else
  381.     echo "No zero sub-second timestamp files found."
  382.   fi
  383. }
  384.  
  385. function service_array_setup() {
  386.   if [ -z "$SERVICES" ]; then
  387.     echo "Please configure serivces"
  388.   else
  389.     echo "Setting up service array"
  390.     read -a service_array <<<$SERVICES
  391.   fi
  392. }
  393.  
  394. function stop_services(){
  395.   for i in ${service_array[@]}; do
  396.     echo "Pausing Service - ""${i^}";
  397.     docker pause $i
  398.   done
  399. }
  400.  
  401. function restore_services(){
  402.   for i in ${service_array[@]}; do
  403.     echo "Unpausing Service - ""${i^}";
  404.     docker unpause $i
  405.   done
  406.  
  407.   if [ $GRACEFUL -eq 1 ]; then
  408.     return
  409.   fi
  410.  
  411.   clean_desc
  412.  
  413.   exit
  414. }
  415.  
  416. function clean_desc(){
  417.   # Cleanup file descriptors
  418.   exec 1>&{out} 2>&{err}
  419.  
  420.   # If interactive shell restore output
  421.   [[ $- == *i* ]] && exec &>/dev/tty
  422. }
  423.  
  424. function prepare_mail() {
  425.   if [ $CHK_FAIL -eq 1 ]; then
  426.     if [ $DEL_COUNT -gt $DEL_THRESHOLD -a $DO_SYNC -eq 0 ]; then
  427.       MSG="Deleted Files ($DEL_COUNT) / ($DEL_THRESHOLD) Violation"
  428.     fi
  429.  
  430.     if [ $DEL_COUNT -gt $DEL_THRESHOLD -a $UPDATE_COUNT -gt $UP_THRESHOLD -a $DO_SYNC -eq 0 ]; then
  431.       MSG="$MSG & "
  432.     fi
  433.  
  434.     if [ $UPDATE_COUNT -gt $UP_THRESHOLD -a $DO_SYNC -eq 0 ]; then
  435.       MSG="$MSG Changed Files ($UPDATE_COUNT) / ($UP_THRESHOLD) Violation"
  436.     fi
  437.  
  438.     SUBJECT="[WARNING] $SYNC_WARN_COUNT - ($MSG) $EMAIL_SUBJECT_PREFIX"
  439.   elif [ -z "${JOBS_DONE##*"SYNC"*}" -a -z "$(grep -w "SYNC_JOB-" $TMP_OUTPUT)" ]; then
  440.     # Sync ran but did not complete successfully so lets warn the user
  441.     SUBJECT="[WARNING] SYNC job ran but did not complete successfully $EMAIL_SUBJECT_PREFIX"
  442.   elif [ -z "${JOBS_DONE##*"SCRUB"*}" -a -z "$(grep -w "SCRUB_JOB-" $TMP_OUTPUT)" ]; then
  443.     # Scrub ran but did not complete successfully so lets warn the user
  444.     SUBJECT="[WARNING] SCRUB job ran but did not complete successfully $EMAIL_SUBJECT_PREFIX"
  445.   else
  446.     SUBJECT="[COMPLETED] $JOBS_DONE Jobs $EMAIL_SUBJECT_PREFIX"
  447.   fi
  448. }
  449.  
  450. function send_mail(){
  451.   # Format for markdown
  452.   sed_me "s/$/  /" "$TMP_OUTPUT"
  453.   $MAIL_BIN -s "$SUBJECT" "$EMAIL_ADDRESS" < $TMP_OUTPUT
  454. }
  455.  
  456. #Due to how process substitution and newer bash versions work, this function stops the output stream which allows wait stops wait from hanging on the tee process.
  457. #If we do not do this and use normal 'wait' the processes will wait forever as newer bash version will wait for the process substitution to finish.
  458. #Probably not the best way of 'fixing' this issue. Someone with more knowledge can provide better insight.
  459. function close_output_and_wait(){
  460.   exec >&$out 2>&$err
  461.   wait $(pgrep -P "$$")
  462. }
  463.  
  464. # Redirects output to file and screen. Open a new tee process.
  465. function output_to_file_screen(){
  466.   # redirect all output to screen and file
  467.   exec {out}>&1 {err}>&2
  468.   # NOTE: Not preferred format but valid: exec &> >(tee -ia "${TMP_OUTPUT}" )
  469.   exec > >(tee -a "${TMP_OUTPUT}") 2>&1
  470. }
  471.  
  472. # Set TRAP
  473. trap restore_services INT EXIT
  474.  
  475. main "$@"
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement