Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/bin/bash
- PAGE="$1"
- if [[ -z "$PAGE" ]]; then
- echo "e-hentai.org downloader - usage: $0 base-gallery-url|--resume [cookies.txt]"
- echo "May be a link to the FIRST page of a gallery or the FIRST image in a gallery"
- echo "Cookies.txt file only needed for exhentai, to make pandas happy"
- echo " if not specified and needed, ~/ehcookie.txt will be used if it exists"
- echo "If page is '--resume', it continues the previous run in the current dir"
- exit 2
- fi
- UA='Mozilla/5.0 (Windows NT 6.3; WOW64; rv:42.0) Gecko/20100101 Firefox/42.0'
- REF=''
- NUM=1
- #are we resuming? (basically click Back, F5, and continue working)
- if [[ "$PAGE" == "--resume" ]]; then
- #delete the most recent imagepage file
- FILE="`ls i*.htm | sort -Vr | head -n 1`"
- mv "$FILE" "old_${FILE}"
- #reparse previous file
- FILE="`ls i*.htm | sort -Vr | head -n 1`"
- if [[ -z "$FILE" ]]; then
- echo "There doesn't seem to be anything reasonable to resume here!"
- exit 1
- fi
- PAGE="`hxselect 'div.sni a' < $FILE | hxnormalize | grep -m1 -B2 'https\?://\(ehgt.org/g\|exhentai.org/img\)/n.png' | grep 'href=' | tail -n 1 | sed -e 's/.*href="\([^"]*\)".*/\1/'`"
- if [[ -z "$PAGE" ]]; then
- echo "Error parsing next page URL from $FILE"
- exit 1
- fi
- NUM="${FILE%.htm}"
- NUM="${NUM#i}"
- NUM=$((NUM + 1))
- echo "Will continue from page $NUM: $PAGE"
- echo "Press enter to start operation"
- fi
- #do we need a cookie (exhentai)
- COOKIE='/dev/null'
- if [[ "$PAGE" == *exhentai.org/* ]]; then
- [[ -z "$2" ]] && COOKIE=~/ehcookie.txt || COOKIE="$2"
- echo "EXHENTAI DETECTED, USING COOKIE FILE $COOKIE"
- TMPFILE="`mktemp`"
- wget --load-cookies "$COOKIE" --user-agent="$UA" "http://exhentai.org/" -O "$TMPFILE" || exit 1
- PANDA="`od -N 4 -t x1 < "$TMPFILE" | head -n 1`"
- rm -f "$TMPFILE"
- if [[ "$PANDA" == "0000000 ff d8 ff e0" ]]; then
- #got a jpg, not an HTML page; assume it's a picture of a sad panda
- echo ">>> Your cookie makes the panda sad. Please fix ${COOKIE}."
- exit 1
- fi
- echo ">>> The panda is happy. We're good to go!"
- fi
- #is this a /g/ link?
- if [[ "$PAGE" == */g/* ]]; then
- echo "$PAGE"
- echo "Gallery link - fetching and finding first image page"
- wget --load-cookies "$COOKIE" --user-agent="$UA" --referer="$REF" "$PAGE" -O galbase.htm || exit 1
- REF="$PAGE"
- PAGE="`hxnormalize < galbase.htm | grep -m1 'https\?://\(g.e-\|ex\)hentai.org/s/' | sed -e 's/.*href="\([^"]*\)".*/\1/'`"
- if [[ -z "$PAGE" ]]; then
- echo "Error parsing first image link from galbase.htm"
- exit 1
- fi
- echo "First image link: $PAGE"
- fi
- while true; do
- echo "Page ${NUM}: $PAGE"
- wget --load-cookies "$COOKIE" --user-agent="$UA" --referer="$REF" "$PAGE" -O i${NUM}.htm || exit 1
- #extract image link
- IMG="`hxselect 'a img' < i${NUM}.htm | hxnormalize | grep src='"' | grep -v '"https\?://ehgt.org/\|"https\?://exhentai.org/img/' | sed -e 's/.*src="\([^"]*\)".*/\1/'`"
- if [[ -z "$IMG" ]]; then
- echo "Error parsing image link from i${NUM}.htm"
- exit 1
- fi
- wget --load-cookies "$COOKIE" --user-agent="$UA" --referer="$PAGE" "$IMG" || exit 1
- #extract next page link
- REF="$PAGE"
- NEXT="`hxselect 'div.sni a' < i${NUM}.htm | hxnormalize | grep -m1 -B2 'https\?://\(ehgt.org/g\|exhentai.org/img\)/n.png' | grep 'href=' | tail -n 1 | sed -e 's/.*href="\([^"]*\)".*/\1/'`"
- if [[ -z "$NEXT" ]]; then
- echo "Error parsing next page URL from i${NUM}.htm"
- exit 1
- fi
- [[ "$PAGE" == "$NEXT" ]] && break
- PAGE="$NEXT"
- NUM=$(( NUM + 1 ))
- done
- echo "All done, last page was $NUM"
- mkdir html
- mv *.htm html
- exit 0
Add Comment
Please, Sign In to add comment