Advertisement
Guest User

Untitled

a guest
Feb 24th, 2020
175
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.89 KB | None | 0 0
  1. # -*- coding: utf-8 -*-
  2. import scrapy
  3.  
  4.  
  5. class BusinessSpider(scrapy.Spider):
  6.     name = 'business'
  7.     allowed_domains = ['www.businesslist.com.ng']
  8.     start_urls = ['https://www.businesslist.com.ng/browse-business-cities']
  9.  
  10.     def parse(self, response):
  11.         businesses = response.xpath("//div[@class='box']/ul/li/a")
  12.         for business in businesses:
  13.             city = business.xpath('.//text()').get()
  14.             link = business.xpath('.//@href').get() #you had an error here you called businesses instead of business
  15.             yield response.follow(url=link, callback=self.parse_business, meta={'city_name': city})
  16.  
  17.     def parse_business(self, response):
  18.         city = response.request.meta['city_name']
  19.         rows = response.xpath(
  20.             "//div[@id='listings']/div[@class='company with_img g_0']")
  21.  
  22.         for row in rows:
  23.             business_name = row.xpath(".//h4/a/text()").get()
  24.             address = row.xpath(".//div[@class='address']/text()").get()
  25.             city = row.xpath(".//div[@class='address']/a[1]/text()").get()
  26.             state = row.xpath(".//div[@class='address']/a[2]/text()").get()
  27.             latitude = row.xpath(".//a/@data-ltd").get()
  28.             longitude = row.xpath(".//a/@data-lng").get()
  29.  
  30.             yield {'city': city,
  31.                    'business_name': business_name,
  32.                    'address': address,
  33.                    'city': city,
  34.                    'state': state,
  35.                    'latitude': latitude,
  36.                    'longitude': longitude}
  37.  
  38.         next_page = response.xpath(
  39.             "//div[@class='pages_container']/a[@class='pages_arrow'][position()=last()]/@href").get()
  40.         if next_page:
  41.             yield response.follow(url=next_page, callback=self.parse_business, meta={'city_name': city}) #the callback method here should be parse_business and you should also send city_name as request meta
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement