Don't like ads? PRO users don't see any ads ;-)
Guest

Untitled

By: a guest on May 5th, 2012  |  syntax: None  |  size: 1.27 KB  |  hits: 10  |  expires: Never
download  |  raw  |  embed  |  report abuse  |  print
Text below is selected. Please press Ctrl+C to copy to your clipboard. (⌘+C on Mac)
  1. require 'hpricot'
  2. require 'iconv'
  3. require 'open-uri'
  4.  
  5. def parse_microfox(page)
  6.   doc = Hpricot(Iconv.conv('utf-8', 'LATIN-9', "#{page.doc}"))
  7.  
  8.   @title = ""
  9.   @description = ""
  10.   @price = ""
  11.   @page_url = ""
  12.  
  13.   # GET TITLE
  14.   if @title = doc.search("//p.modello") then
  15.     @title = @title.inner_html
  16.   end
  17.  
  18.   # GET DESCRIPTION
  19.   # rimuovi prezzo di mercato
  20.   if @description = doc.search("//p.descrizione_generale") then
  21.     @description = @description.inner_html
  22.   end
  23.  
  24.   # GET PRICE
  25.   if @price = doc.search("//p.prezzo_menoiva/span")[1] then
  26.     @price = @price.inner_html.split[1]
  27.   end
  28.  
  29.   @page_url = "#{page.url}"
  30.  
  31.   if p = Product.find_by_page_url(@page_url) then
  32.     puts "Product already fetched."
  33.   else
  34.     p = Product.new
  35.     p.name = @title
  36.    
  37.     # STRIP HTLM TAGS
  38.     #remove format tags
  39.     p.description = @description.gsub(/[\r\n\t]/, "").gsub(/^\s+|\s+$/,"").gsub(/<\/?[^>]*>/,  "")
  40.    
  41.     # CONVERT PRICE FORMAT
  42.     if @price.nil? then
  43.       p.price = 0
  44.     else
  45.       #convert price format
  46.       p.price = @price.gsub(".","").gsub(",",".").to_f
  47.     end
  48.    
  49.     p.page_url = @page_url
  50.     p.shop_id = Shop.find_by_name("Microfox3000").id
  51.   end
  52.  
  53.   if p.save then
  54.     return p
  55.   else
  56.     return nil
  57.   end
  58. end