Advertisement
Guest User

Untitled

a guest
Jan 21st, 2017
119
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.06 KB | None | 0 0
  1. import scrapy
  2.  
  3.  
  4. class MAL (scrapy.Spider):
  5.     name = "mal_spider"
  6.     start_urls = ["https://myanimelist.net/topanime.php"]
  7.     custom_settings = {
  8.         'AUTOTHROTTLE_ENABLED': True,
  9.         'DOWNLOAD_DELAY': 5.0,
  10.     }
  11.  
  12.  
  13.     def parse(self, response):
  14.         ANIME_SELECTOR = '.ranking-list'
  15.  
  16.         for anime in response.css(ANIME_SELECTOR):
  17.             NAME_SELECTOR = 'div a::text'
  18.             SCORE_SELECTOR = 'div span::text'
  19.             ANIME_URL_SELECTOR = 'div a::attr(href)'
  20.  
  21.             data ={
  22.                 'name' : anime.css(NAME_SELECTOR).extract_first(),
  23.                 'score' : anime.css(SCORE_SELECTOR).extract_first(),
  24.                 #'url' : anime.css(ANIME_URL_SELECTOR).extract_first()
  25.             }
  26.             for entry in response.css(ANIME_URL_SELECTOR):
  27.                 SYNOPSIS_SELECTOR = '//meta[@property="og:description"]/@content'
  28.                 data['synopsis'] = entry.xpath(SYNOPSIS_SELECTOR).extract_first()
  29.  
  30.  
  31.             yield data
  32.         #NEXT_PAGE_SELECTOR = '.next ::attr(href)'
  33.         #next_page = response.css(NEXT_PAGE_SELECTOR).extract_first()
  34.         #if next_page:
  35.         #   yield scrapy.Request(response.urljoin(next_page), callback = self.parse)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement