fant0men

hdd_dump.sh

Nov 22nd, 2019 (edited)
90
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #!/bin/bash
  2.  
  3. # This script will look for all files in directory given as first argument,
  4. # sort them by smallest > largest, and put that list in an array.
  5. # We will then go through that array and copy each file one by one to the output
  6. # directory. The script will check the MD5 hashes of all the files to avoid
  7. # copying duplicates (in order to save space in the output directory).
  8.  
  9. # This script can be useful when dumping the content of failing hard drives or
  10. # broken partitions. The script outputs a list of files that were copied, and a
  11. # list of files that couldn't be copied, in the user's home directory.
  12.  
  13. # Since the script copies the smallest files first, the highest possible number
  14. # of files will be copied (preferably all of the files). This is because smaller
  15. # files are faster to read / write, and there's statistically a smaller chance
  16. # of a bad block / sector hitting a small file. By copying the smaller files
  17. # first, if the hard drive really is about to fail, the largest possible number
  18. # of files will be copied.
  19.  
  20. # If the script has problems reading a file, it will retry reading it a maximum
  21. # of 10 times, 5 times to check the MD5 hash, and 5 times to copy the file.
  22.  
  23. # Permissions and modification dates of the input files are preserved in the
  24. # output files by the script.
  25.  
  26. set -o pipefail
  27.  
  28. declare -A md5s
  29. declare cp_switch exit_status
  30.  
  31. # If the script isn't run with sudo / root privileges, then quit.
  32. if [[ $(whoami) != root ]]; then
  33.     echo -e "You need to be root to run this script!\n"
  34.     exit
  35. fi
  36.  
  37. if [[ ! -d $1 || ! $2 ]]; then
  38.     bn=$(basename "$0")
  39.     echo "Usage: ${bn} [in_dir] [out_dir]"
  40.     exit
  41. fi
  42.  
  43. session="$RANDOM"
  44. cp_log="/home/lucifer/hdd_dump_copied-${session}.txt"
  45. error_log="/home/lucifer/hdd_dump_errors-${session}.txt"
  46. in_dir=$(readlink -f "$1")
  47.  
  48. mkdir -p "$2" || exit
  49.  
  50. out_dir=$(readlink -f "$2")
  51.  
  52. #used=$(du --summarize --block-size=1 "$in_dir" | tr '[:space:]' ' ' | cut -d' ' -f1)
  53. #free=$(df --output=avail --block-size=1 "$out_dir" | tail -n +2 | tr -d '[:space:]')
  54.  
  55. if [[ $used -gt $free ]]; then
  56.     diff=$(( used - free ))
  57.  
  58.     cat <<USED
  59. Not enough free space in:
  60. ${out_dir}
  61.  
  62. Difference is ${diff} bytes.
  63. USED
  64.  
  65.     exit
  66.  
  67. fi
  68.  
  69. # The 'md5copy' function checks the MD5 hash of the input file, and tries to
  70. # copy the file.
  71. md5copy () {
  72.     if="$1"
  73.  
  74.     exit_status='1'
  75.     cp_switch='1'
  76.     n='0'
  77.  
  78.     declare md5_if md5_of
  79.  
  80.     while [[ $exit_status -ne 0 && $n -lt 5 ]]; do
  81.         n=$(( n + 1 ))
  82.        
  83.         md5_if=$(md5sum -b "$if" 2>&- | cut -d' ' -f1)
  84.  
  85.         exit_status="$?"
  86.         if [[ $exit_status -ne 0 && $n -lt 5 ]]; then
  87.             sleep 10
  88.         fi
  89.     done
  90.  
  91.     if [[ $exit_status -ne 0 || ! $md5_if ]]; then
  92.         return
  93.     fi
  94.  
  95.     if [[ ${md5s[${md5_if}]} -eq 1 ]]; then
  96.         cp_switch='0'
  97.         return
  98.     fi
  99.  
  100.     md5s[${md5_if}]='1'
  101.  
  102.     exit_status='1'
  103.     n='0'
  104.  
  105.     printf "copying: ${if}... "
  106.  
  107.     while [[ $exit_status -ne 0 && $n -lt 5 ]]; do
  108.         n=$(( n + 1 ))
  109.  
  110.         cp -p "$if" "$of" 2>&-
  111.  
  112.         exit_status="$?"
  113.         if [[ $exit_status -ne 0 && $n -lt 5 ]]; then
  114.             sleep 10
  115.         fi
  116.     done
  117.  
  118.     if [[ $exit_status -eq 0 ]]; then
  119.         echo 'done'
  120.     else
  121.         echo 'error'
  122.  
  123.         if [[ -f $of ]]; then
  124.             rm -f "$of" 2>&-
  125.         fi
  126.     fi
  127.  
  128.     return
  129. }
  130.  
  131. touch "$cp_log" "$error_log"
  132.  
  133. mapfile -t files < <(find "$in_dir" -type f -print0 | xargs -0 du -b | sort -n | sed 's/^[[:digit:]]*[[:space:]]*//')
  134.  
  135. for (( i = 0; i < ${#files[@]}; i++ )); do
  136.     line="${files[${i}]}"
  137.     bn=$(basename "$line")
  138.  
  139. # Removes the directory name from the beginning of the string. Creating the
  140. # basename this way because it's more safe than using regex:es, if the string
  141. # contains weird characters (that are interpreted as part of the regex).
  142.     mapfile -d'/' -t path_parts <<<"${in_dir}"
  143.     start=$(( ${#path_parts[@]} + 1 ))
  144.     dir=$(dirname "$line" | cut -d'/' -f${start}-)
  145.  
  146.     dir_cp="${out_dir}/${dir}"
  147.     of="${dir_cp}/${bn}"
  148.  
  149.     mkdir -p "$dir_cp" || exit
  150.  
  151.     if [[ ! -f $of ]]; then
  152.         md5copy "$line"
  153.  
  154.         if [[ $exit_status -eq 0 && $cp_switch -eq 1 ]]; then
  155.             echo "$line" >> "$cp_log"
  156.         elif [[ $exit_status -ne 0 ]]; then
  157.             echo "$line" >> "$error_log"
  158.         fi
  159.     fi
  160. done
  161.  
  162.  
RAW Paste Data