Advertisement
Guest User

Untitled

a guest
Sep 21st, 2017
70
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.89 KB | None | 0 0
  1. import scrapy
  2. from scrapy_splash import SplashRequest
  3.  
  4. class MySpider(scrapy.Spider):
  5. start_urls = ["http://example.com", "http://example.com/foo"]
  6.  
  7. def start_requests(self):
  8. for url in self.start_urls:
  9. yield SplashRequest(url, self.parse,
  10. endpoint='render.html',
  11. args={'wait': 0.5},
  12. )
  13.  
  14. def parse(self, response):
  15. # response.body is a result of render.html call; it
  16. # contains HTML processed by a browser.
  17. # …
  18.  
  19. import scrapy
  20. from scrapy_splash import SplashRequest
  21.  
  22. class MySpider(scrapy.Spider):
  23. first_page_url = "http://example.com"
  24.  
  25. def get_page_contents(self):
  26. # fetch first page contents
  27. page_contents = extract_content(response)
  28.  
  29. # Select the select control from page
  30. for element in selected_control:
  31. click_element(element, extract_content)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement