Guest User

Untitled

a guest
May 22nd, 2020
25
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.03 KB | None | 0 0
  1. import scrapy
  2. from scrapy.exceptions import CloseSpider
  3.  
  4. # from scrapy.loader import ItemLoader
  5. # from tutorial.items import TutorialItem
  6.  
  7.  
  8. class TutorialItem(scrapy.Item):
  9. # define the fields for your item here like:
  10. Tags = scrapy.Field()
  11. Authors = scrapy.Field()
  12. Text = scrapy.Field()
  13.  
  14.  
  15. class QuotesSpider(scrapy.Spider):
  16. name = "quotes"
  17. result = TutorialItem()
  18. urls = ['http://quotes.toscrape.com/page/1/']
  19.  
  20. def start_requests(self):
  21. num = 1
  22. for url in self.urls:
  23. num += 1
  24. self.urls.append(str(self.urls[0][:-2] + str(num) + '/'))
  25. yield scrapy.Request(url=url, callback=self.parse)
  26.  
  27.  
  28. def _tags_parse(self, tags):
  29. for i in tags:
  30. if 'Tags' not in QuotesSpider.result:
  31. QuotesSpider.result['Tags'] = []
  32. QuotesSpider.result['Tags'].append(i)
  33.  
  34. def _authors_parse(self, authors):
  35. for i in authors:
  36. if 'Authors' not in QuotesSpider.result:
  37. QuotesSpider.result['Authors'] = []
  38. QuotesSpider.result['Authors'].append(i)
  39.  
  40. def _text_parse(self, text):
  41. for i in text:
  42. if 'Text' not in QuotesSpider.result:
  43. QuotesSpider.result['Text'] = []
  44. QuotesSpider.result['Text'].append(i)
  45.  
  46. def _content_checker(self, content):
  47. if len(content) == 0:
  48. raise CloseSpider('No more pages with contents!')
  49.  
  50.  
  51. def parse(self, response):
  52. tags = set(response.xpath('//a[@class="tag"]//text()').getall())
  53. authors = set(response.xpath('//small[@class="author"]//text()').getall())
  54. text = response.xpath('//div[@class="col-md-8"]//span[@class="text"]//text()').getall()
  55. QuotesSpider._content_checker(self, authors)
  56. QuotesSpider._tags_parse(self, tags)
  57. QuotesSpider._authors_parse(self, authors)
  58. QuotesSpider._text_parse(self, text)
  59. with open('tags.txt', 'w') as f:
  60. f.write(str(QuotesSpider.result))
Advertisement
Add Comment
Please, Sign In to add comment