
Untitled
By: a guest on
May 5th, 2012 | syntax:
None | size: 1.27 KB | hits: 10 | expires: Never
require 'hpricot'
require 'iconv'
require 'open-uri'
def parse_microfox(page)
doc = Hpricot(Iconv.conv('utf-8', 'LATIN-9', "#{page.doc}"))
@title = ""
@description = ""
@price = ""
@page_url = ""
# GET TITLE
if @title = doc.search("//p.modello") then
@title = @title.inner_html
end
# GET DESCRIPTION
# rimuovi prezzo di mercato
if @description = doc.search("//p.descrizione_generale") then
@description = @description.inner_html
end
# GET PRICE
if @price = doc.search("//p.prezzo_menoiva/span")[1] then
@price = @price.inner_html.split[1]
end
@page_url = "#{page.url}"
if p = Product.find_by_page_url(@page_url) then
puts "Product already fetched."
else
p = Product.new
p.name = @title
# STRIP HTLM TAGS
#remove format tags
p.description = @description.gsub(/[\r\n\t]/, "").gsub(/^\s+|\s+$/,"").gsub(/<\/?[^>]*>/, "")
# CONVERT PRICE FORMAT
if @price.nil? then
p.price = 0
else
#convert price format
p.price = @price.gsub(".","").gsub(",",".").to_f
end
p.page_url = @page_url
p.shop_id = Shop.find_by_name("Microfox3000").id
end
if p.save then
return p
else
return nil
end
end