Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # -*- coding: utf-8 -*-
- import scrapy
- class BusinessSpider(scrapy.Spider):
- name = 'business'
- allowed_domains = ['www.businesslist.com.ng']
- start_urls = ['https://www.businesslist.com.ng/browse-business-cities']
- def parse(self, response):
- businesses = response.xpath("//div[@class='box']/ul/li/a")
- for business in businesses:
- city = business.xpath('.//text()').get()
- link = business.xpath('.//@href').get() #you had an error here you called businesses instead of business
- yield response.follow(url=link, callback=self.parse_business, meta={'city_name': city})
- def parse_business(self, response):
- city = response.request.meta['city_name']
- rows = response.xpath(
- "//div[@id='listings']/div[@class='company with_img g_0']")
- for row in rows:
- business_name = row.xpath(".//h4/a/text()").get()
- address = row.xpath(".//div[@class='address']/text()").get()
- city = row.xpath(".//div[@class='address']/a[1]/text()").get()
- state = row.xpath(".//div[@class='address']/a[2]/text()").get()
- latitude = row.xpath(".//a/@data-ltd").get()
- longitude = row.xpath(".//a/@data-lng").get()
- yield {'city': city,
- 'business_name': business_name,
- 'address': address,
- 'city': city,
- 'state': state,
- 'latitude': latitude,
- 'longitude': longitude}
- next_page = response.xpath(
- "//div[@class='pages_container']/a[@class='pages_arrow'][position()=last()]/@href").get()
- if next_page:
- yield response.follow(url=next_page, callback=self.parse_business, meta={'city_name': city}) #the callback method here should be parse_business and you should also send city_name as request meta
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement