Untitled

import scrapy
from scrapy.exceptions import CloseSpider

# from scrapy.loader import ItemLoader
# from tutorial.items import TutorialItem


class TutorialItem(scrapy.Item):
    # define the fields for your item here like:
    Tags = scrapy.Field()
    Authors = scrapy.Field()
    Text = scrapy.Field()


class QuotesSpider(scrapy.Spider):
    name = "quotes"
    result = TutorialItem()
    urls = ['http://quotes.toscrape.com/page/1/']

    def start_requests(self):
        num = 1
        for url in self.urls:
            num += 1
            self.urls.append(str(self.urls[0][:-2] + str(num) + '/'))
            yield scrapy.Request(url=url, callback=self.parse)


    def _tags_parse(self, tags):
        for i in tags:
            if 'Tags' not in QuotesSpider.result:
                QuotesSpider.result['Tags'] = []
            QuotesSpider.result['Tags'].append(i)

    def _authors_parse(self, authors):
        for i in authors:
            if 'Authors' not in QuotesSpider.result:
                QuotesSpider.result['Authors'] = []
            QuotesSpider.result['Authors'].append(i)

    def _text_parse(self, text):
        for i in text:
            if 'Text' not in QuotesSpider.result:
                QuotesSpider.result['Text'] = []
            QuotesSpider.result['Text'].append(i)

    def _content_checker(self, content):
        if len(content) == 0:
            raise CloseSpider('No more pages with contents!')


    def parse(self, response):
        tags = set(response.xpath('//a[@class="tag"]//text()').getall())
        authors = set(response.xpath('//small[@class="author"]//text()').getall())
        text = response.xpath('//div[@class="col-md-8"]//span[@class="text"]//text()').getall()
        QuotesSpider._content_checker(self, authors)
        QuotesSpider._tags_parse(self, tags)
        QuotesSpider._authors_parse(self, authors)
        QuotesSpider._text_parse(self, text)
        with open('tags.txt', 'w') as f:
            f.write(str(QuotesSpider.result))