Advertisement
dereksir

Untitled

Aug 16th, 2023 (edited)
101
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 0.98 KB | None | 0 0
  1. import scrapy
  2. from urllib.parse import urlencode
  3.  
  4. def get_zenrows_api_url(url, api_key):
  5.     # Define the necessary parameters
  6.     payload = {
  7.         'url': url,
  8.         'js_render': 'true',
  9.         'js_instructions': '[{"wait": 500}]',
  10.         'premium_proxy': 'true',
  11.         'antibot': 'true',            
  12.     }
  13.    
  14.     # Construct the API URL
  15.     api_url = f'https://api.zenrows.com/v1/?apikey={api_key}&{urlencode(payload)}'
  16.     return api_url
  17.  
  18.  
  19. class TestSpider(scrapy.Spider):
  20.     name = 'test'
  21.  
  22.     def start_requests(self):
  23.         urls = [
  24.             'https://angular.io/docs',
  25.         ]
  26.         api_key = 'Your_API_Key'
  27.         for url in urls:
  28.             # make a GET request using the ZenRows API URL
  29.             api_url = get_zenrows_api_url(url, api_key)
  30.             yield scrapy.Request(api_url, callback=self.parse)
  31.  
  32.     def parse(self, response):
  33.         # Extract and print the title tag
  34.         title = response.css('title::text').get()
  35.         yield {'title': title}
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement