Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- Download middleware:
- def process_request(self, request, spider):
- browser_api.open_page(request.url)
- browser_api.wait_for_page_to_be_loaded()
- return HtmlResponse(
- browser_api.current_url(),
- body=browser_api.get_page_html(),
- encoding='utf-8',
- request=request
- )
- Spider:
- class SomeSpider(CrawlSpider):
- name = "some site"
- allowed_domains = ['some_site.com']
- start_urls = ['https://some_site.com/']
- custom_settings = {
- 'DOWNLOADER_MIDDLEWARES': {
- 'middlewares_custom.MyCustomMiddleware': 900,
- }
- }
- allow_urls = #...
- deny_urls = # ...
- rules = (
- Rule(LinkExtractor(allow='/something'), callback='parse_item', follow=True),
- Rule(LinkExtractor(allow=allow_urls, deny=deny_urls)),
- )
- def parse_item(self, response):
- print('I am never called :(')
- return []
Add Comment
Please, Sign In to add comment