silenius

Untitled

Feb 26th, 2015
245
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.98 KB | None | 0 0
  1. #!/bin/bash
  2.  
  3. # generate some helpfull information
  4. if [ "$1" == "help" ]; then
  5. echo "usage: call the script with at least 1 parameter:"
  6. echo "the board you want to dump"
  7. echo ""
  8. echo "the second parameter is optional and can be used"
  9. echo "to download the first 'x' pages (from 0 tot 15) default 0"
  10. echo "you can also provide all to dump them all"
  11. echo ""
  12. echo "examples:"
  13. echo "foo.sh p (downloads the first page of /p/)"
  14. echo "foo.sh p 4 (downloads page 0 through 4)"
  15. echo "foo.sh p all (downloads every page)"
  16. exit 1
  17. fi
  18. # TODO check for both arguments and default them to something if an argument is not provided
  19.  
  20. # parse the arguments
  21. if [ "$1" == "" ]; then
  22. echo error && exit
  23. else
  24. BOARD="$1"
  25. fi
  26.  
  27. if [ "$2" == "all" ]; then
  28. PAGES="15"
  29. elif [ "$2" == "" ]; then
  30. PAGES="15"
  31. else
  32. PAGES="$2"
  33. fi
  34.  
  35. # ikno, dirty but whatevs
  36. if [ ! -d $BOARD ]; then
  37. mkdir $BOARD
  38. fi
  39.  
  40. cd $BOARD
  41.  
  42. # remove old temp files
  43. rm urls.tmp
  44. rm posts.tmp
  45. rm imageurls.tmp
  46. rm gchan.html
  47.  
  48. # get the html file
  49. echo "http://www.furaffinity.net/gallery/$BOARD/" >> urls.tmp
  50. for (( p = 1; p <= $PAGES; p++ ))
  51. do
  52. echo "http://www.furaffinity.net/gallery/$BOARD/$p/" >> urls.tmp
  53. done
  54.  
  55. wget -i urls.tmp -O gchan.html -nv
  56.  
  57. #exit
  58. # parse the html file to get all the threads
  59. cat gchan.html | sed s/\</\\n\</g | grep '/view/' | sed 's/<a href="/http:\/\/www.furaffinity.net/' | sed 's/">//' | sort | uniq > posts.tmp
  60.  
  61. # wget all the files and output them to 1 single file
  62. wget -i posts.tmp -O gchan.html -nv
  63.  
  64. # parse all the files to get the image urls
  65. cat gchan.html | sed s/\</\\n\</g | grep 'Download' | sort | uniq | sed 's/<a href="'// | sed 's/"> Download//' > imageurls.tmp
  66.  
  67.  
  68.  
  69. # wget ALL DEM IMAGES
  70. wget -i imageurls.tmp -nc
  71.  
  72. # zip all them shit and send them via email
  73. #zip images.zip *
  74. # remove the images after being zipped, takes up a lot of space so...
  75. #rm images/*
  76.  
  77. # remove temp files
  78.  
  79. #rm urls.tmp
  80. #rm posts.tmp
  81. #rm imageurls.tmp
  82. #rm gchan.html
  83.  
  84. cd ..
Advertisement
Add Comment
Please, Sign In to add comment