triclops200

Xkcd_download_script.rb

Mar 26th, 2012
282
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Ruby 2.90 KB | None | 0 0
  1. require 'socket'
  2. require 'cgi'
  3. @host = "xkcd.com"
  4. @port = 80
  5. path = "/"
  6. def logic(i,errors,max,table)
  7.     s = true
  8.     begin
  9.         ssocket = TCPSocket.open(@host,@port)
  10.         ssocket.print(request ("/"+i.to_s+"/"))
  11.         response = ssocket.read
  12.         headers,body = response.split("\r\n\r\n",2)
  13.         img = body[(/cs\//=~body)+3..(/[gf]" t/=~body)]
  14.        /"/=~body
  15.         if img.count(">") > 0
  16.             img = img[(/src="/=~img)+33..-1]
  17.        end
  18.        imgfmt = img[-3..-1]
  19.        ft = true
  20.        ctitle = body[(/ctitle/=~body)+8..(/comicNav/=~body)-19]
  21.        table[i] = CGI.unescapeHTML ctitle
  22.        ctitle = CGI.escapeHTML ctitle
  23.        ctitle = ctitle.gsub(/\'/,"'")
  24.         begin
  25.             funnytext = CGI.unescapeHTML(body[(/[gf]" t/=~body)+10..(/"#{Regexp.escape ctitle}"/=~body)-7])
  26.         rescue
  27.             errors << [i,"Mouse Over Text Not Downloaded, please download manually"]
  28.             ft = false
  29.         end
  30.         puts "Downloading #{img} to #{i}.#{imgfmt} (#{i}/#{max})"
  31.         ssocket2 = TCPSocket.open("imgs.xkcd.com",80)
  32.         ssocket2.print(request2("/comics/"+img))
  33.         response = ssocket2.read
  34.         headers,imgdata = response.split("\r\n\r\n",2)
  35.         f = File.new("./imgs/"+i.to_s+"."+imgfmt,"w")
  36.         f.syswrite(imgdata)
  37.         f.close
  38.         f = File.new("./txts/"+i.to_s+".txt","w")
  39.         f.syswrite(funnytext)
  40.         f.close
  41.         s = ft
  42.     rescue
  43.         errors << [i,"Image NOT Downloaded, please download manually"]
  44.         s = false
  45.     end
  46.     return s
  47. end
  48. def request(path)
  49.     return "GET #{path} HTTP/1.0\r\n\r\n"
  50. end
  51. def request2(path)
  52.     return "GET #{path} HTTP/1.1\r\nHost:imgs.xkcd.com\r\n\r\n"
  53. end
  54. if Dir["./imgs"] == []
  55.     Dir.mkdir("imgs")
  56. end
  57. if Dir["./txts"] == []
  58.     Dir.mkdir("txts")
  59. end    
  60. socket = TCPSocket.open(@host,@port)
  61. socket.print(request (path))
  62. response = socket.read
  63. headers,body = response.split("\r\n\r\n",2)
  64. max = body[(/Permanent\ link\ to\ this\ comic:\ http:\/\/xkcd.com\//=~body)+46..(/\/\<\/h3\>$/=~body)-1].to_i
  65. t = 0
  66. continue = true
  67. errors = []
  68. s = ""
  69. if File.exists?("strips.db")
  70.     f = File.new("strips.db", "r+")
  71.     f.each_line do |line|
  72.         s += line
  73.     end
  74. else
  75.     s = "[]"
  76. end
  77. numt=45
  78. table = eval s
  79. while t <= max/numt + numt+1
  80.     threads = []
  81.     numt.times do |i|
  82.         threads << Thread.new do
  83.             i = i + t*numt + 1
  84.             if i <= max
  85.                     if !File.exists?("./txts/"+i.to_s+".txt") && i != 404
  86.                         logic(i,errors,max,table)
  87.                     end
  88.             end
  89.         end
  90.     end
  91.     threads.each{|a| a.join}
  92.     t+=1
  93. end
  94. errors.each do |arr|
  95.     v = []
  96.     fixed = logic(arr[0],v,max,table)  
  97.     if !fixed
  98.         puts "Error in downloading #{v[0][0]}: #{v[0][1]}"
  99.     end
  100. end
  101. table[0]=max
  102. f = File.new("strips.db", "w+")
  103. f.syswrite(table.inspect)
  104. f.close
Add Comment
Please, Sign In to add comment