Advertisement
Foxtrod89

seleniumwire_playground

Dec 12th, 2023
855
0
Never
1
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.56 KB | Source Code | 0 0
  1. from seleniumwire.utils import decode as decodesw
  2. from seleniumwire import webdriver
  3. from urllib.parse import unquote
  4. import json
  5.  
  6. def show_requests_url(driver,target_url):
  7.     driver.get(target_url)
  8.     urls = []
  9.     for request in driver.requests:
  10.         urls.append({"url":request.url})
  11.     return urls
  12.  
  13. def show_responses(driver, target_url):
  14.     driver.get(target_url)
  15.     responses = []
  16.     for request in driver.requests:
  17.         try:
  18.             data = decodesw(
  19.                 request.response.body,
  20.                 request.response.headers.get('Content-Encoding', 'identity')
  21.             )
  22.             resp = json.loads(data.decode("utf-8"))
  23.             responses.append(resp)
  24.         except:
  25.             pass
  26.     return responses
  27.  
  28.  
  29. def main():
  30.     keywords = ['https://www.google-analytics.com']
  31.     # uncomment this if you want full responses
  32.     # options = webdriver.FirefoxOptions()
  33.     options = webdriver.ChromeOptions()
  34.     options.add_argument("--headless")
  35.     driver = webdriver.Chrome(seleniumwire_options={"disable_encoding":True},options=options)
  36.     target_url = "https://pergikuliner.com/restaurants?page=1"
  37.     # target_url = "https://www.google.com"
  38.     urls = show_requests_url(driver,target_url)
  39.     responses = show_responses(driver,target_url)
  40.    
  41.     for url in urls:
  42.         for kw in keywords:
  43.             if kw in url['url']:
  44.                 print(unquote(url['url']))
  45.     with open('json_responses.json','w') as fo:
  46.         json.dump(responses,fo)
  47.  
  48.     driver.close()
  49.  
  50. if __name__ == "__main__":
  51.     main()
  52.  
Tags: web scraping
Advertisement
Comments
  • Foxtrod89
    1 year (edited)
    # text 1.35 KB | 0 0
    1. Firefox is able to capture more responses for some reasons
    2. example:
    3. [
    4. {
    5. "country_code": "US",
    6. "country_name": "United States"
    7. },
    8. {
    9. "metadata": {},
    10. "timestamp": 1702426203007,
    11. "changes": [
    12. {
    13. "id": "3ddbe540-6900-30ef-7853-199a3b4bc6df",
    14. "last_modified": 1702072014351,
    15. "bucket": "main",
    16. "collection": "quicksuggest",
    17. "host": "firefox.settings.services.mozilla.com"
    18. }
    19. ]
    20. },
    21. {
    22. "metadata": {
    23. "schema": {
    24. "type": "object",
    25. "properties": {
    26. "id": {
    27. "type": "string"
    28. }
    29. }
    30. },
    31. "signature": {
    32. "ref": "20uju743097rl2aarsb7lrgodj",
    33. "x5u": "https://content-signature-2.cdn.mozilla.net/chains/remote-settings.content-signature.mozilla.org-2024-01-19-16-42-21.chain",
    34.  
    35. Chrome response:
    36.  
    37. [{}, {"place": [{"city": "jakarta", "places": ["Green Ville", "Kebon Jeruk", "Kedoya", "Jelambar", "Daan Mogot", "Kemanggisan", "Mangga Besar", "Tanjung Duren", "Bendungan Hilir", "Gatot Subroto", "Cempaka Putih", "Cikini", "Salemba", "Menteng", "Gajah Mada", "Gambir", "Blok M", "Melawai", "Kebayoran Baru", "Cilan
Add Comment
Please, Sign In to add comment
Advertisement