Untitled

#!/system/bin/sh

# If you going to run this script on common linux machine then change shebang
# on #!/bin/sh

# Introducing shitgrabber!
# This is script that download all author's public pastes from pastebin.
### USAGE: ###
# shitgrabber.sh url_to_pastebin_author_page
# Example for android
# sh ./shitgrabber.sh http://pastebin.com/u/foobar

### CONFIG: ###
# Where to download main pastebin author page
MAIN_HTM="./main.htm"
# Dir to place books. Script will create there subdir with author's name
BOOKS_DIR=/sdcard/Books
# Absolute path to some temporary file
CURL_LIST="/sdcard/scripts/curl.list"

# I know that this script is load of shit but it was written for personal purposes
# so fuck^W make it better by yourself. I don't care.

URL=$1
#http://pastebin.com/u/Bastinator/2
AUTHOR=`echo $URL | awk '{sub(/http:\/\//, "", $0); \
                        split($0, token, "/"); \
                        print token[3]}'`

# param $1 - path to raw html file from pastbin
# return   - csv list, format: <url>;<title>
htm2csv() {
    RAW_LIST=$(grep -i "^.*class=\"i_p0\".*</td>" $1)
    CSV_LIST=$(echo "$RAW_LIST" | \
        awk '{sub(/^.*href="/, "", $0); \
              sub(/<\/a>.*$/, "", $0); \
              sub(/">/, ";", $0); \
              print $0}')
    echo "$CSV_LIST"
}

getLinkList() {
    grep -i "<div class=\"pagination\".*</div>" $1 |\
        awk '{gsub(/<\/a>/, "</a>\n", $0); print $0}' |\
        awk '{ \
        sub(/.*table><div class="pagination">.*<\/a>$/, "", $0); \
            sub(/<a href=.*Oldest.*<\/a>/, "", $0);
            sub(/<\/?div.*div>/, "", $0); \
            sub(/<a href="/, "http://pastebin.com", $0);\
            sub(/">.*<\/a>/, "", $0); \
            print $0}' |\
        sed '/^[[:space:]]*$/d'
}

# param $1 - csv list
numerise() {
    ln=$(cat $1 | wc -l)

    mv $1{,.old}
    cat $1.old | \
    awk -F ";" -v i=$ln '{print $1";"i--"_"$2}' > $1
}

getArgs() {
    echo $1 | \
    awk -F ';' \
        '{sub(/^\//, "http://pastebin.com/download.php?i=", $1); \
        gsub(/[ \/\;\"\#]/, "_", $2); \
        print $1" -O "$2".txt"}'
}

### Main ###
wget $URL -O $MAIN_HTM
getLinkList $MAIN_HTM > $CURL_LIST
xargs curl < $CURL_LIST >> $MAIN_HTM
htm2csv $MAIN_HTM > $CURL_LIST
numerise $CURL_LIST

mkdir $BOOKS_DIR/$AUTHOR
cd $BOOKS_DIR/$AUTHOR

while read line; do
    wget `getArgs "$line"`
done < $CURL_LIST

echo "Done, enjoy ^^"
exit 0