Guest User

Exhentai downloader script

a guest
Jul 25th, 2019
3,326
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Bash 3.48 KB | None | 0 0
  1. #!/bin/bash
  2.  
  3. PAGE="$1"
  4. if [[ -z "$PAGE" ]]; then
  5.     echo "e-hentai.org downloader - usage: $0 base-gallery-url|--resume [cookies.txt]"
  6.     echo "May be a link to the FIRST page of a gallery or the FIRST image in a gallery"
  7.     echo "Cookies.txt file only needed for exhentai, to make pandas happy"
  8.     echo "    if not specified and needed, ~/ehcookie.txt will be used if it exists"
  9.     echo "If page is '--resume', it continues the previous run in the current dir"
  10.     exit 2
  11. fi
  12.  
  13. UA='Mozilla/5.0 (Windows NT 6.3; WOW64; rv:42.0) Gecko/20100101 Firefox/42.0'
  14. REF=''
  15.  
  16. NUM=1
  17. #are we resuming? (basically click Back, F5, and continue working)
  18. if [[ "$PAGE" == "--resume" ]]; then
  19.     #delete the most recent imagepage file
  20.     FILE="`ls i*.htm | sort -Vr | head -n 1`"
  21.     mv "$FILE" "old_${FILE}"
  22.     #reparse previous file
  23.     FILE="`ls i*.htm | sort -Vr | head -n 1`"
  24.     if [[ -z "$FILE" ]]; then
  25.         echo "There doesn't seem to be anything reasonable to resume here!"
  26.         exit 1
  27.     fi
  28.     PAGE="`hxselect 'div.sni a' < $FILE | hxnormalize | grep -m1 -B2 'https\?://\(ehgt.org/g\|exhentai.org/img\)/n.png' | grep 'href=' | tail -n 1 | sed -e 's/.*href="\([^"]*\)".*/\1/'`"
  29.     if [[ -z "$PAGE" ]]; then
  30.         echo "Error parsing next page URL from $FILE"
  31.         exit 1
  32.     fi
  33.     NUM="${FILE%.htm}"
  34.     NUM="${NUM#i}"
  35.     NUM=$((NUM + 1))
  36.     echo "Will continue from page $NUM: $PAGE"
  37.     echo "Press enter to start operation"
  38. fi
  39.  
  40. #do we need a cookie (exhentai)
  41. COOKIE='/dev/null'
  42. if [[ "$PAGE" == *exhentai.org/* ]]; then
  43.     [[ -z "$2" ]] && COOKIE=~/ehcookie.txt || COOKIE="$2"
  44.     echo "EXHENTAI DETECTED, USING COOKIE FILE $COOKIE"
  45.     TMPFILE="`mktemp`"
  46.     wget --load-cookies "$COOKIE" --user-agent="$UA" "http://exhentai.org/" -O "$TMPFILE" || exit 1
  47.     PANDA="`od -N 4 -t x1 < "$TMPFILE" | head -n 1`"
  48.     rm -f "$TMPFILE"
  49.     if [[ "$PANDA" == "0000000 ff d8 ff e0" ]]; then
  50.         #got a jpg, not an HTML page; assume it's a picture of a sad panda
  51.         echo ">>> Your cookie makes the panda sad. Please fix ${COOKIE}."
  52.         exit 1
  53.     fi
  54.     echo ">>> The panda is happy. We're good to go!"
  55. fi
  56.  
  57. #is this a /g/ link?
  58. if [[ "$PAGE" == */g/* ]]; then
  59.     echo "$PAGE"
  60.     echo "Gallery link - fetching and finding first image page"
  61.     wget --load-cookies "$COOKIE" --user-agent="$UA" --referer="$REF" "$PAGE" -O galbase.htm || exit 1
  62.     REF="$PAGE"
  63.     PAGE="`hxnormalize < galbase.htm | grep -m1 'https\?://\(g.e-\|ex\)hentai.org/s/' | sed -e 's/.*href="\([^"]*\)".*/\1/'`"
  64.     if [[ -z "$PAGE" ]]; then
  65.         echo "Error parsing first image link from galbase.htm"
  66.         exit 1
  67.     fi
  68.     echo "First image link: $PAGE"
  69. fi
  70.  
  71. while true; do
  72.     echo "Page ${NUM}: $PAGE"
  73.     wget --load-cookies "$COOKIE" --user-agent="$UA" --referer="$REF" "$PAGE" -O i${NUM}.htm || exit 1
  74.     #extract image link
  75.     IMG="`hxselect 'a img' < i${NUM}.htm | hxnormalize | grep src='"' | grep -v '"https\?://ehgt.org/\|"https\?://exhentai.org/img/' | sed -e 's/.*src="\([^"]*\)".*/\1/'`"
  76.     if [[ -z "$IMG" ]]; then
  77.         echo "Error parsing image link from i${NUM}.htm"
  78.         exit 1
  79.     fi
  80.     wget --load-cookies "$COOKIE" --user-agent="$UA" --referer="$PAGE" "$IMG" || exit 1
  81.     #extract next page link
  82.     REF="$PAGE"
  83.     NEXT="`hxselect 'div.sni a' < i${NUM}.htm | hxnormalize | grep -m1 -B2 'https\?://\(ehgt.org/g\|exhentai.org/img\)/n.png' | grep 'href=' | tail -n 1 | sed -e 's/.*href="\([^"]*\)".*/\1/'`"
  84.     if [[ -z "$NEXT" ]]; then
  85.         echo "Error parsing next page URL from i${NUM}.htm"
  86.         exit 1
  87.     fi
  88.     [[ "$PAGE" == "$NEXT" ]] && break
  89.     PAGE="$NEXT"
  90.     NUM=$(( NUM + 1 ))
  91. done
  92.  
  93. echo "All done, last page was $NUM"
  94. mkdir html
  95. mv *.htm html
  96. exit 0
Add Comment
Please, Sign In to add comment