Guest User

Untitled

a guest
Apr 8th, 2015
298
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Bash 2.41 KB | None | 0 0
  1. #!/system/bin/sh
  2.  
  3. # If you going to run this script on common linux machine then change shebang  
  4. # on #!/bin/sh
  5.  
  6. # Introducing shitgrabber!
  7. # This is script that download all author's public pastes from pastebin.
  8. ### USAGE: ###
  9. # shitgrabber.sh url_to_pastebin_author_page
  10. # Example for android
  11. # sh ./shitgrabber.sh http://pastebin.com/u/foobar
  12.  
  13. ### CONFIG: ###
  14. # Where to download main pastebin author page
  15. MAIN_HTM="./main.htm"
  16. # Dir to place books. Script will create there subdir with author's name
  17. BOOKS_DIR=/sdcard/Books
  18. # Absolute path to some temporary file
  19. CURL_LIST="/sdcard/scripts/curl.list"
  20.  
  21. # I know that this script is load of shit but it was written for personal purposes
  22. # so fuck^W make it better by yourself. I don't care.
  23.  
  24. URL=$1
  25. #http://pastebin.com/u/Bastinator/2
  26. AUTHOR=`echo $URL | awk '{sub(/http:\/\//, "", $0); \
  27.                        split($0, token, "/"); \
  28.                        print token[3]}'`
  29.  
  30. # param $1 - path to raw html file from pastbin
  31. # return   - csv list, format: <url>;<title>
  32. htm2csv() {
  33.     RAW_LIST=$(grep -i "^.*class=\"i_p0\".*</td>" $1)
  34.     CSV_LIST=$(echo "$RAW_LIST" | \
  35.         awk '{sub(/^.*href="/, "", $0); \
  36.               sub(/<\/a>.*$/, "", $0); \
  37.               sub(/">/, ";", $0); \
  38.               print $0}')
  39.     echo "$CSV_LIST"
  40. }
  41.  
  42. getLinkList() {
  43.     grep -i "<div class=\"pagination\".*</div>" $1 |\
  44.         awk '{gsub(/<\/a>/, "</a>\n", $0); print $0}' |\
  45.         awk '{ \
  46.        sub(/.*table><div class="pagination">.*<\/a>$/, "", $0); \
  47.            sub(/<a href=.*Oldest.*<\/a>/, "", $0);
  48.            sub(/<\/?div.*div>/, "", $0); \
  49.            sub(/<a href="/, "http://pastebin.com", $0);\
  50.            sub(/">.*<\/a>/, "", $0); \
  51.            print $0}' |\
  52.         sed '/^[[:space:]]*$/d'
  53. }
  54.  
  55. # param $1 - csv list
  56. numerise() {
  57.     ln=$(cat $1 | wc -l)
  58.    
  59.     mv $1{,.old}
  60.     cat $1.old | \
  61.     awk -F ";" -v i=$ln '{print $1";"i--"_"$2}' > $1
  62. }
  63.  
  64. getArgs() {
  65.     echo $1 | \
  66.     awk -F ';' \
  67.         '{sub(/^\//, "http://pastebin.com/download.php?i=", $1); \
  68.        gsub(/[ \/\;\"\#]/, "_", $2); \
  69.        print $1" -O "$2".txt"}'
  70. }
  71.  
  72. ### Main ###
  73. wget $URL -O $MAIN_HTM
  74. getLinkList $MAIN_HTM > $CURL_LIST
  75. xargs curl < $CURL_LIST >> $MAIN_HTM
  76. htm2csv $MAIN_HTM > $CURL_LIST
  77. numerise $CURL_LIST
  78.  
  79. mkdir $BOOKS_DIR/$AUTHOR
  80. cd $BOOKS_DIR/$AUTHOR
  81.  
  82. while read line; do
  83.     wget `getArgs "$line"`
  84. done < $CURL_LIST
  85.  
  86. echo "Done, enjoy ^^"
  87. exit 0
Advertisement
Add Comment
Please, Sign In to add comment