Guest User

Untitled

a guest
Nov 21st, 2017
81
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.02 KB | None | 0 0
  1. <script type="application/ld+json">
  2. {
  3. 'data I want to extract'
  4. }
  5. </script>
  6.  
  7. import scrapy
  8. import json
  9. class OpenriceSpider(scrapy.Spider):
  10. name = 'openrice'
  11. allowed_domains = ['www.openrice.com']
  12.  
  13. def start_requests(self):
  14. headers = {
  15. 'accept-encoding': 'gzip, deflate, sdch, br',
  16. 'accept-language': 'en-US,en;q=0.8,zh-CN;q=0.6,zh;q=0.4',
  17. 'upgrade-insecure-requests': '1',
  18. 'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36',
  19. 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
  20. 'cache-control': 'max-age=0',
  21. }
  22. url = 'https://www.openrice.com/en/hongkong/r-kitchen-one-cafe-sha-tin-western-r483821'
  23. yield scrapy.Request(url=url, headers=headers, callback=self.parse)
  24.  
  25. def parse(self, response): # response = request url ?
  26. items = []
  27. jsonresponse = json.loads(response)
Add Comment
Please, Sign In to add comment