Advertisement
arkanon

xkcd Ripper

Feb 11th, 2011
288
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Bash 2.52 KB | None | 0 0
  1. #!/bin/bash
  2.  
  3. # xkcd Ripper
  4. # <http://xkcd.com/>
  5. #
  6. # Arkanon <arkanon@lsd.org.br>
  7. # 2011/02/12 (Sáb) 01:25:50 (BRD)
  8. # 2010/10/13 (Qua) 00:08:56 (BRS)
  9. # 2010/10/12 (Ter) 23:43:14 (BRS)
  10. #
  11. # The ace in the hole :-p
  12. # <http://recantodasletras.uol.com.br/artigos/69424>
  13. # <http://denilsodelima.blogspot.com/2010/11/como-e-que-se-diz-o-pulo-do-gato-em.html>
  14. #
  15. # echo -e \\x$(printf %x 39)
  16. # output|'
  17. #
  18. # eval "echo -e \"$(echo "&#39;Petit&#39;\nbeing a reference" | sed -r 's:&#([0-9]+);:$(echo -e \\\\x$(printf %x \1)):g')\""
  19. # output|'Petit'
  20. #       |being a reference
  21. #
  22. # lynx -dump -force_html <(echo '&quot;')
  23. # output|   "
  24. #       |
  25.  
  26. if [ $# != 1 ]
  27. then
  28.   echo "Usage: $(basename $0) <issue-number>"
  29.   exit
  30. fi
  31.  
  32.  out=$(wget -qO- http://xkcd.com/$1) || { echo "not found"; exit; }
  33.  
  34. data=$(eval "echo -e \"$(
  35.  echo "$out" \
  36.   | grep -e "<h1>" -e "/comics/.*title" \
  37.   | sed -r '
  38.              s:\\:_BS_:g
  39.              s: *<h1>([^<]*)</h1>.*:\1:g
  40.              s:.*/([^/]*)":\1:
  41.               s:" title=":\n:
  42.               s:".*::
  43.            # here goes the line below
  44.              s:_BS_:\\\\\\\\:g
  45.            '
  46. )\"")
  47.  
  48. t1=$(echo "$data" | sed '1!d')
  49. i1=$(echo "$data" | sed '2!d')
  50. l1=$(echo "$data" | sed '3!d')
  51.  
  52. # This command (l2) could be [merged with the sed above] OR [deleted if all we wanted was the next label (l3)]
  53. l2=$(eval "echo -e \"$(
  54.  echo "$l1" \
  55.  | sed -r '
  56.            s:\\:_BS_:g
  57.            s:&#([0-9]+);:$(echo -e \\\\x$(printf %x \1)):g # put just this line in sed above
  58.            s:_BS_:\\\\\\\\:g
  59.          '
  60. )\"")
  61.  
  62. l3=$(lynx -dump -force_html -width=1024 <(echo $l2) | sed -r 's/(^ +| +$)//')
  63.  
  64. echo -e "\n\e[1;33mTitle\e[0m"
  65. echo    "$t1"
  66.  
  67. echo -e "\n\e[1;33mImage filename\e[0m"
  68. echo    "$i1"
  69.  
  70. echo -e "\n\e[1;33mInside the img title attribute\e[0m"
  71. echo    "$l2"
  72.  
  73. echo -e "\n\e[1;33mOutput\e[0m"
  74. echo    "$l3"
  75.  
  76.  
  77. if [ $1 = 859 ]
  78. then
  79.  
  80.   echo -e "\n\e[1;32mFor reference, that's the correct answer\e[0m"
  81.  
  82.   echo -e "\n\e[1;33mTitle\e[0m"
  83.   echo    "("
  84.  
  85.   echo -e "\n\e[1;33mImage filename\e[0m"
  86.   echo    "(.png"
  87.  
  88.   echo -e "\n\e[1;33mInside the img title attribute\e[0m"
  89.   echo    "Brains aside, I wonder how many poorly-written xkcd.com-parsing scripts will break on this title (or \\\\;;&quot;\''{\&lt;&lt;[' this mouseover text.&quot;"
  90.  
  91.   echo -e "\n\e[1;33mOutput\e[0m"
  92.   echo    "Brains aside, I wonder how many poorly-written xkcd.com-parsing scripts will break on this title (or \\\\;;\"\''{\<<[' this mouseover text.\""
  93.  
  94. fi
  95.  
  96.  
  97. echo
  98.  
  99. # EOF
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement