Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import scrapy
- class MAL (scrapy.Spider):
- name = "mal_spider"
- start_urls = ["https://myanimelist.net/topanime.php"]
- custom_settings = {
- 'AUTOTHROTTLE_ENABLED': True,
- 'DOWNLOAD_DELAY': 5.0,
- }
- def parse(self, response):
- ANIME_SELECTOR = '.ranking-list'
- for anime in response.css(ANIME_SELECTOR):
- NAME_SELECTOR = 'div a::text'
- SCORE_SELECTOR = 'div span::text'
- ANIME_URL_SELECTOR = 'div a::attr(href)'
- data ={
- 'name' : anime.css(NAME_SELECTOR).extract_first(),
- 'score' : anime.css(SCORE_SELECTOR).extract_first(),
- #'url' : anime.css(ANIME_URL_SELECTOR).extract_first()
- }
- for entry in response.css(ANIME_URL_SELECTOR):
- SYNOPSIS_SELECTOR = '//meta[@property="og:description"]/@content'
- data['synopsis'] = entry.xpath(SYNOPSIS_SELECTOR).extract_first()
- yield data
- #NEXT_PAGE_SELECTOR = '.next ::attr(href)'
- #next_page = response.css(NEXT_PAGE_SELECTOR).extract_first()
- #if next_page:
- # yield scrapy.Request(response.urljoin(next_page), callback = self.parse)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement