Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import scrapy
- from scrapy.spiders import SitemapSpider
- from parsel import Selector
- class DigikeySsylkiFromSitemapSpider(SitemapSpider):
- name = 'digikey-ssylki-from-sitemap'
- sitemap_urls = ['https://www.digikey.com/product-detail/sitemap.xml']
- sitemap_rules = [('/submap/', 'parse')]
- custom_settings = {
- 'CONCURRENT_REQUESTS': 64,
- 'DOWNLOAD_DELAY': 1
- }
- def parse(self, response):
- body = Selector(response.text)
- body.xpath('//loc').get()
Add Comment
Please, Sign In to add comment