Guest User

Untitled

a guest
May 24th, 2018
85
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.60 KB | None | 0 0
  1. module Crawler
  2. class WebCrawler
  3. def start(start_url)
  4. @urls_to_crawl.write([:url, URI(start_url)])
  5. crawl do |doc|
  6. yield doc
  7. end
  8. end
  9.  
  10. private
  11.  
  12. def crawl
  13. loop do
  14. url = @urls_to_crawl.take([:url, nil])[1]
  15. @urls_status[url.to_s] = true
  16.  
  17. doc = download_resource(url) do |file|
  18. Hpricot(file)
  19. end or next
  20. yield doc
  21.  
  22. time_begin = Time.now
  23. add_new_urls(extract_urls(doc, url))
  24. AccessDb('data.dta')
  25. puts "Elapsed: #{Time.now - time_begin}"
  26. end
  27. end
  28. end
  29.  
  30. class AccessDb
  31. attr_accessor :mdb, :connection, :data, :fields
  32.  
  33. def initialize(mdb=nil)
  34. @mdb = mdb
  35. @connection = nil
  36. @data = nil
  37. @fields = nil
  38. end
  39.  
  40. def open
  41. connection_string = 'Provider=Microsoft.Jet.OLEDB.4.0;Data Source='
  42. connection_string << @mdb
  43. @connection = WIN32OLE.new('ADODB.Connection')
  44. @connection.Open(connection_string)
  45. end
  46.  
  47. def query(sql)
  48. recordset = WIN32OLE.new('ADODB.Recordset')
  49. recordset.Open(sql, @connection)
  50. @fields = []
  51. recordset.Fields.each do |field|
  52. @fields << field.Name
  53. end
  54. begin
  55. @data = recordset.GetRows.transpose
  56. rescue
  57. @data = []
  58. end
  59. recordset.Close
  60. end
  61.  
  62. def execute(sql)
  63. @connection.Execute(sql)
  64. end
  65.  
  66. def close
  67. @connection.Close
  68. end
  69. end
  70. end
Add Comment
Please, Sign In to add comment