Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/system/bin/sh
- # If you going to run this script on common linux machine then change shebang
- # on #!/bin/sh
- # Introducing shitgrabber!
- # This is script that download all author's public pastes from pastebin.
- ### USAGE: ###
- # shitgrabber.sh url_to_pastebin_author_page
- # Example for android
- # sh ./shitgrabber.sh http://pastebin.com/u/foobar
- ### CONFIG: ###
- # Where to download main pastebin author page
- MAIN_HTM="./main.htm"
- # Dir to place books. Script will create there subdir with author's name
- BOOKS_DIR=/sdcard/Books
- # Absolute path to some temporary file
- CURL_LIST="/sdcard/scripts/curl.list"
- # I know that this script is load of shit but it was written for personal purposes
- # so fuck^W make it better by yourself. I don't care.
- URL=$1
- #http://pastebin.com/u/Bastinator/2
- AUTHOR=`echo $URL | awk '{sub(/http:\/\//, "", $0); \
- split($0, token, "/"); \
- print token[3]}'`
- # param $1 - path to raw html file from pastbin
- # return - csv list, format: <url>;<title>
- htm2csv() {
- RAW_LIST=$(grep -i "^.*class=\"i_p0\".*</td>" $1)
- CSV_LIST=$(echo "$RAW_LIST" | \
- awk '{sub(/^.*href="/, "", $0); \
- sub(/<\/a>.*$/, "", $0); \
- sub(/">/, ";", $0); \
- print $0}')
- echo "$CSV_LIST"
- }
- getLinkList() {
- grep -i "<div class=\"pagination\".*</div>" $1 |\
- awk '{gsub(/<\/a>/, "</a>\n", $0); print $0}' |\
- awk '{ \
- sub(/.*table><div class="pagination">.*<\/a>$/, "", $0); \
- sub(/<a href=.*Oldest.*<\/a>/, "", $0);
- sub(/<\/?div.*div>/, "", $0); \
- sub(/<a href="/, "http://pastebin.com", $0);\
- sub(/">.*<\/a>/, "", $0); \
- print $0}' |\
- sed '/^[[:space:]]*$/d'
- }
- # param $1 - csv list
- numerise() {
- ln=$(cat $1 | wc -l)
- mv $1{,.old}
- cat $1.old | \
- awk -F ";" -v i=$ln '{print $1";"i--"_"$2}' > $1
- }
- getArgs() {
- echo $1 | \
- awk -F ';' \
- '{sub(/^\//, "http://pastebin.com/download.php?i=", $1); \
- gsub(/[ \/\;\"\#]/, "_", $2); \
- print $1" -O "$2".txt"}'
- }
- ### Main ###
- wget $URL -O $MAIN_HTM
- getLinkList $MAIN_HTM > $CURL_LIST
- xargs curl < $CURL_LIST >> $MAIN_HTM
- htm2csv $MAIN_HTM > $CURL_LIST
- numerise $CURL_LIST
- mkdir $BOOKS_DIR/$AUTHOR
- cd $BOOKS_DIR/$AUTHOR
- while read line; do
- wget `getArgs "$line"`
- done < $CURL_LIST
- echo "Done, enjoy ^^"
- exit 0
Advertisement
Add Comment
Please, Sign In to add comment