Untitled

import scrapy
#import time


class QuotesSpider(scrapy.Spider):
    name = "products"
    BASE_URL = 'https://shop.hbsfc.co.il/'
    def start_requests(self):
        urls = [self.BASE_URL]
        for url in urls:
            yield scrapy.Request(url=url, callback=self.parse)

    def parse(self, response):
        links = set(response.xpath('//a[starts-with(@href, "/category")]/@href').extract())
        print "Num of links" + str(len(links))
        for link in links:
            absolute_url = self.BASE_URL + link
            print "### outer URL: " + absolute_url
            #time.sleep(1)
            yield scrapy.Request(absolute_url, callback=self.parse_attr)

    def parse_single(self, response):
        #time.sleep(1)
        pages = response.xpath('//a[contains(@class, "page-link")]/@href').extract()
        if (len(pages) == 0):
            products = response.xpath('//a[@class="ee_product_click"]')
            for product in products:
                item_id = product.xpath('@ee_list_itemid').get()
                item_price = product.xpath('@ee_list_itemprice').get()
                res = {'item_id': item_id, 'price': item_price}
                yield res
            return

    def parse_attr(self, response):
        #time.sleep(1)
        pages = response.xpath('//a[contains(@class, "page-link")]/@href').extract()
        print str(pages)
        if (len(pages) == 0):
            products = response.xpath('//a[@class="ee_product_click"]')
            for product in products:
                item_id = product.xpath('@ee_list_itemid').get()
                item_price = product.xpath('@ee_list_itemprice').get()
                res = {'item_id': item_id, 'price': item_price}
                yield res
            return
        for page in pages:
            #time.sleep(3)
            print (page)
            yield scrapy.Request(page, callback=self.parse_single)


        # pages = response.xpath('//a[contains(@class, "page-link")]/@class').extract()
        # if (pages is [] or pages is None):
        #     return
        # current_index = [i for i, s in enumerate(pages) if u'current' in s]
        # if (current_index is None or current_index is [] or len(current_index) == 0):
        #     return
        # print "PAGING"
        # print "########## current_index" + str(current_index)
        # current_index = current_index[0]
        # if (len(pages) > current_index):
        #     if(current_index is 0):
        #         time.sleep(3)
        #         yield scrapy.Request(response.url + '/page/1', callback=self.parse_attr)
        #     else:
        #         time.sleep(4)
        #         print "Page Number: " + (current_index + 1)
        #         yield scrapy.Request(response.url + '/page/' + (current_index + 1), callback=self.parse_attr)