Exhentai downloader script

#!/bin/bash

PAGE="$1"
if [[ -z "$PAGE" ]]; then
    echo "e-hentai.org downloader - usage: $0 base-gallery-url|--resume [cookies.txt]"
    echo "May be a link to the FIRST page of a gallery or the FIRST image in a gallery"
    echo "Cookies.txt file only needed for exhentai, to make pandas happy"
    echo "    if not specified and needed, ~/ehcookie.txt will be used if it exists"
    echo "If page is '--resume', it continues the previous run in the current dir"
    exit 2
fi

UA='Mozilla/5.0 (Windows NT 6.3; WOW64; rv:42.0) Gecko/20100101 Firefox/42.0'
REF=''

NUM=1
#are we resuming? (basically click Back, F5, and continue working)
if [[ "$PAGE" == "--resume" ]]; then
    #delete the most recent imagepage file
    FILE="`ls i*.htm | sort -Vr | head -n 1`"
    mv "$FILE" "old_${FILE}"
    #reparse previous file
    FILE="`ls i*.htm | sort -Vr | head -n 1`"
    if [[ -z "$FILE" ]]; then
        echo "There doesn't seem to be anything reasonable to resume here!"
        exit 1
    fi
    PAGE="`hxselect 'div.sni a' < $FILE | hxnormalize | grep -m1 -B2 'https\?://\(ehgt.org/g\|exhentai.org/img\)/n.png' | grep 'href=' | tail -n 1 | sed -e 's/.*href="\([^"]*\)".*/\1/'`"
    if [[ -z "$PAGE" ]]; then
        echo "Error parsing next page URL from $FILE"
        exit 1
    fi
    NUM="${FILE%.htm}"
    NUM="${NUM#i}"
    NUM=$((NUM + 1))
    echo "Will continue from page $NUM: $PAGE"
    echo "Press enter to start operation"
fi

#do we need a cookie (exhentai)
COOKIE='/dev/null'
if [[ "$PAGE" == *exhentai.org/* ]]; then
    [[ -z "$2" ]] && COOKIE=~/ehcookie.txt || COOKIE="$2"
    echo "EXHENTAI DETECTED, USING COOKIE FILE $COOKIE"
    TMPFILE="`mktemp`"
    wget --load-cookies "$COOKIE" --user-agent="$UA" "http://exhentai.org/" -O "$TMPFILE" || exit 1
    PANDA="`od -N 4 -t x1 < "$TMPFILE" | head -n 1`"
    rm -f "$TMPFILE"
    if [[ "$PANDA" == "0000000 ff d8 ff e0" ]]; then
        #got a jpg, not an HTML page; assume it's a picture of a sad panda
        echo ">>> Your cookie makes the panda sad. Please fix ${COOKIE}."
        exit 1
    fi
    echo ">>> The panda is happy. We're good to go!"
fi

#is this a /g/ link?
if [[ "$PAGE" == */g/* ]]; then
    echo "$PAGE"
    echo "Gallery link - fetching and finding first image page"
    wget --load-cookies "$COOKIE" --user-agent="$UA" --referer="$REF" "$PAGE" -O galbase.htm || exit 1
    REF="$PAGE"
    PAGE="`hxnormalize < galbase.htm | grep -m1 'https\?://\(g.e-\|ex\)hentai.org/s/' | sed -e 's/.*href="\([^"]*\)".*/\1/'`"
    if [[ -z "$PAGE" ]]; then
        echo "Error parsing first image link from galbase.htm"
        exit 1
    fi
    echo "First image link: $PAGE"
fi

while true; do
    echo "Page ${NUM}: $PAGE"
    wget --load-cookies "$COOKIE" --user-agent="$UA" --referer="$REF" "$PAGE" -O i${NUM}.htm || exit 1
    #extract image link
    IMG="`hxselect 'a img' < i${NUM}.htm | hxnormalize | grep src='"' | grep -v '"https\?://ehgt.org/\|"https\?://exhentai.org/img/' | sed -e 's/.*src="\([^"]*\)".*/\1/'`"
    if [[ -z "$IMG" ]]; then
        echo "Error parsing image link from i${NUM}.htm"
        exit 1
    fi
    wget --load-cookies "$COOKIE" --user-agent="$UA" --referer="$PAGE" "$IMG" || exit 1
    #extract next page link
    REF="$PAGE"
    NEXT="`hxselect 'div.sni a' < i${NUM}.htm | hxnormalize | grep -m1 -B2 'https\?://\(ehgt.org/g\|exhentai.org/img\)/n.png' | grep 'href=' | tail -n 1 | sed -e 's/.*href="\([^"]*\)".*/\1/'`"
    if [[ -z "$NEXT" ]]; then
        echo "Error parsing next page URL from i${NUM}.htm"
        exit 1
    fi
    [[ "$PAGE" == "$NEXT" ]] && break
    PAGE="$NEXT"
    NUM=$(( NUM + 1 ))
done

echo "All done, last page was $NUM"
mkdir html
mv *.htm html
exit 0