furas

Python - requests - into-action.us (Stackoverflow)

Sep 24th, 2025 (edited)
153
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.17 KB | None | 0 0
  1. # date: 2025.09.24
  2.  
  3. # [python - Trying to scrape <a href="/media/"> from a webpage and nothing is working - Stack Overflow](https://stackoverflow.com/questions/79773927/trying-to-scrape-a-href-media-from-a-webpage-and-nothing-is-working)
  4.  
  5. import requests
  6.  
  7. session = requests.Session()
  8.  
  9. # load main page to get cookies
  10. url = "https://library.into-action.us/collection/creative-jam-health-is-life"
  11. response = session.get(url)
  12.  
  13. # load data
  14. url = "https://library.into-action.us/api/v1/collections/creative-jam-health-is-life/content/"
  15.  
  16. page = 1
  17. index = 1
  18. while True:
  19.     print(f"--- {page} ---")
  20.  
  21.     response = session.get(url)
  22.  
  23.     data = response.json()
  24.  
  25.     for item in data["results"]:
  26.         # display information
  27.         print(f'{index:4} | {item["id"]:6} | {item["static_image"]}')
  28.  
  29.         # download image if exists
  30.         image_url = item["static_image"]
  31.         if image_url:
  32.             # get filename without parameters and without full path
  33.             filename = image_url.rsplit("?")[0].rsplit("/")[-1]
  34.             # download file
  35.             response = session.get(image_url)
  36.             # write in file with ID at the beginning of filename
  37.             with open(f'{item["id"]:06}_{filename}', "wb") as f:
  38.                 f.write(response.content)
  39.  
  40.         index += 1
  41.  
  42.     # if there is no link to next page then exit loop
  43.     if ("next" not in data) or (not data["next"]):
  44.         break
  45.  
  46.     # get link to next page - to use it in next loop.
  47.     url = data["next"]
  48.     page += 1
  49.  
  50. """
  51. Output: (it also downloads images)
  52.  
  53. --- 1 ---
  54.   1 |  29821 | https://into-action-green-static-files.s3.amazonaws.com/content/2025/06/02/RWJF_CJ_AnnickMartin.jpg
  55.   2 |  29355 | https://media1.giphy.com/media/v1.Y2lkPTc5ODAyNjliaGg5dnpmMXlya2E2cndwd3JvZjU5NHF0bmt3ODV6YWd5amxpenh6cSZlcD12MV9naWZzX2dpZklkJmN0PWc/OTkwczXwJrxq1aVfFV/giphy_s.gif
  56.   3 |  29884 | https://media0.giphy.com/media/v1.Y2lkPTc5ODAyNjliMDRjaHRxNjJiMGhhbTM5MWpyMnk3MWpiMnBqZGxqODV2NW9hdXA1bCZlcD12MV9naWZzX2dpZklkJmN0PWc/VIJeYkKhOC0ifOCfSa/giphy_s.gif
  57.   4 |  29840 | https://media3.giphy.com/media/v1.Y2lkPTc5ODAyNjliZGJmN3phNDk2dHNlbDNmcHRpbmhxdjU1ZjA5amx3ZWk1NWlzdzlhbiZlcD12MV9naWZzX2dpZklkJmN0PWc/niFwnBm5mg5azbfV5c/giphy_s.gif
  58.   5 |  29817 | https://into-action-green-static-files.s3.amazonaws.com/content/2025/06/02/RWJF_CJ_AlexRosales-GenevaPierre-LincolnHarrison_1.png
  59.   6 |  29850 | https://into-action-green-static-files.s3.amazonaws.com/content/2025/06/02/RWJF_CJ_ProjectsPetite.png
  60.   7 |  29353 | https://media3.giphy.com/media/v1.Y2lkPTc5ODAyNjliajM2cmpkajkxYWp2czh6dWtzZ29ybnFra2lybncyaXUxOWQ5cGF5NCZlcD12MV9naWZzX2dpZklkJmN0PWc/DKa0XWo3PfSyuFnZSF/giphy_s.gif
  61.   8 |  29847 | https://into-action-green-static-files.s3.amazonaws.com/content/2025/06/02/RWJF_CJ_Melonsharks.png
  62.   9 |  29860 | https://into-action-green-static-files.s3.amazonaws.com/content/2025/06/02/RWJF_CJ_Yaicecream.png
  63.  10 |  29830 | https://media4.giphy.com/media/v1.Y2lkPTc5ODAyNjliYXB0cGUwdXFxdTI0MjRqcW1yZnJhNWg5MmYxZnMwNjRjMjc5M3hsMiZlcD12MV9naWZzX2dpZklkJmN0PWc/2BECd3EF7VpMmlHiuE/giphy_s.gif
  64.  11 |  29846 | https://into-action-green-static-files.s3.amazonaws.com/content/2025/06/02/RWJF_CJ_MeghanAuer.png
  65. """
Advertisement
Add Comment
Please, Sign In to add comment