Advertisement
Guest User

Untitled

a guest
Sep 30th, 2016
70
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.67 KB | None | 0 0
  1. import re
  2.  
  3. import scrapy
  4. from vesseltracker.items import VesseltrackerItem
  5.  
  6.  
  7. class GetVessel(scrapy.Spider):
  8. name = "getvessel"
  9. allowed_domains = ["marinetraffic.com"]
  10. start_urls = [
  11. 'http://www.marinetraffic.com/en/ais/index/ports/all/flag:AE',
  12. ]
  13.  
  14. def parse(self, response):
  15. item = VesseltrackerItem()
  16. for ports in response.xpath('//table/tr[position()>1]'):
  17. item['port_name'] = ports.xpath('td[2]/a/text()').extract()
  18. port_homepage_url = ports.xpath('td[7]/a/@href').extract()
  19. port_homepage_url = response.urljoin(port_homepage_url)
  20. yield scrapy.Request(port_homepage_url, callback=self.parse, meta={'item': item})
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement