Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import re
- import scrapy
- from vesseltracker.items import VesseltrackerItem
- class GetVessel(scrapy.Spider):
- name = "getvessel"
- allowed_domains = ["marinetraffic.com"]
- start_urls = [
- 'http://www.marinetraffic.com/en/ais/index/ports/all/flag:AE',
- ]
- def parse(self, response):
- item = VesseltrackerItem()
- for ports in response.xpath('//table/tr[position()>1]'):
- item['port_name'] = ports.xpath('td[2]/a/text()').extract()
- port_homepage_url = ports.xpath('td[7]/a/@href').extract()
- port_homepage_url = response.urljoin(port_homepage_url)
- yield scrapy.Request(port_homepage_url, callback=self.parse, meta={'item': item})
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement