Advertisement
Guest User

Untitled

a guest
Feb 8th, 2016
50
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.70 KB | None | 0 0
  1. #!/bin/bash
  2.  
  3.  
  4. ##################################################################
  5. #
  6. # This script uploads files on hdfs directory to s3 RECURSIVELY
  7. #
  8. # usage: hdf2s3.sh <source hdfs dir> <s3://your bucket>
  9. #
  10. ##################################################################
  11. set -eu
  12.  
  13. if [ $# -ne 2 ]; then
  14. echo " usage: hdf2s3.sh <source hdfs dir> <s3://your bucket>"
  15. exit 1
  16. fi
  17.  
  18. if [[ ! $2 =~ ^s3:\/\/.* ]]; then
  19. echo "s3 URI should start with s3://"
  20. exit 1
  21. fi
  22. directory_to_upload=$1
  23. target_bucket=$2
  24. target_bucket=${target_bucket/\/$//}
  25. target_bucket=$(echo $target_bucket | sed 's/s3:\/\///g')
  26.  
  27. tmp_folder=$(mktemp -d /tmp/$(basename $0).XXXXXX) || exit 1
  28. uploadfile_list=$(hdfs dfs -ls -R $directory_to_upload | grep -v drw | tr -s ' '| cut -d' ' -f8)
  29. uploadfile_count=$(printf '%s\n' "${uploadfile_list[@]}" | wc -l )
  30. counter=0
  31. echo TOTAL FILES TO UPLOAD = ${uploadfile_count}
  32.  
  33.  
  34. for i in ${uploadfile_list}; do
  35. filename=$(basename ${i})
  36. dirname=$(dirname ${i})
  37. dest=$target_bucket/$dirname/$filename
  38. dest=$(echo $dest | sed 's/\/\//\//g')
  39. dest=$(echo $dest | sed 's/\/\//\//g')
  40. dest=s3://$dest
  41. let counter=counter+1
  42.  
  43. echo "##### Uploading ${counter}/${uploadfile_count} #####"
  44.  
  45. target_dir_ls=$(s3cmd ls --recursive $dest | wc -l)
  46. if [ $target_dir_ls -gt 0 ]; then
  47. echo "The file $dest already exists. Skipping..."
  48. continue
  49. fi
  50.  
  51. #echo "hdfs dfs -get ${i} ${tmp_folder}";
  52. hdfs dfs -get ${i} ${tmp_folder}/;
  53.  
  54. echo s3cmd --quiet put ${tmp_folder}/${filename} $dest
  55. s3cmd --quiet put ${tmp_folder}/${filename} $dest
  56.  
  57. #echo rm ${tmp_folder}/${filename};
  58. rm ${tmp_folder}/${filename};
  59. done
  60.  
  61. rmdir ${tmp_folder}
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement