Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- <script type="application/ld+json">
- {
- 'data I want to extract'
- }
- </script>
- import scrapy
- import json
- class OpenriceSpider(scrapy.Spider):
- name = 'openrice'
- allowed_domains = ['www.openrice.com']
- def start_requests(self):
- headers = {
- 'accept-encoding': 'gzip, deflate, sdch, br',
- 'accept-language': 'en-US,en;q=0.8,zh-CN;q=0.6,zh;q=0.4',
- 'upgrade-insecure-requests': '1',
- 'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36',
- 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
- 'cache-control': 'max-age=0',
- }
- url = 'https://www.openrice.com/en/hongkong/r-kitchen-one-cafe-sha-tin-western-r483821'
- yield scrapy.Request(url=url, headers=headers, callback=self.parse)
- def parse(self, response): # response = request url ?
- items = []
- jsonresponse = json.loads(response)
Add Comment
Please, Sign In to add comment