Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # date: 2025.09.24
- # [python - Trying to scrape <a href="/media/"> from a webpage and nothing is working - Stack Overflow](https://stackoverflow.com/questions/79773927/trying-to-scrape-a-href-media-from-a-webpage-and-nothing-is-working)
- import requests
- session = requests.Session()
- # load main page to get cookies
- url = "https://library.into-action.us/collection/creative-jam-health-is-life"
- response = session.get(url)
- # load data
- url = "https://library.into-action.us/api/v1/collections/creative-jam-health-is-life/content/"
- page = 1
- index = 1
- while True:
- print(f"--- {page} ---")
- response = session.get(url)
- data = response.json()
- for item in data["results"]:
- # display information
- print(f'{index:4} | {item["id"]:6} | {item["static_image"]}')
- # download image if exists
- image_url = item["static_image"]
- if image_url:
- # get filename without parameters and without full path
- filename = image_url.rsplit("?")[0].rsplit("/")[-1]
- # download file
- response = session.get(image_url)
- # write in file with ID at the beginning of filename
- with open(f'{item["id"]:06}_{filename}', "wb") as f:
- f.write(response.content)
- index += 1
- # if there is no link to next page then exit loop
- if ("next" not in data) or (not data["next"]):
- break
- # get link to next page - to use it in next loop.
- url = data["next"]
- page += 1
- """
- Output: (it also downloads images)
- --- 1 ---
- 1 | 29821 | https://into-action-green-static-files.s3.amazonaws.com/content/2025/06/02/RWJF_CJ_AnnickMartin.jpg
- 2 | 29355 | https://media1.giphy.com/media/v1.Y2lkPTc5ODAyNjliaGg5dnpmMXlya2E2cndwd3JvZjU5NHF0bmt3ODV6YWd5amxpenh6cSZlcD12MV9naWZzX2dpZklkJmN0PWc/OTkwczXwJrxq1aVfFV/giphy_s.gif
- 3 | 29884 | https://media0.giphy.com/media/v1.Y2lkPTc5ODAyNjliMDRjaHRxNjJiMGhhbTM5MWpyMnk3MWpiMnBqZGxqODV2NW9hdXA1bCZlcD12MV9naWZzX2dpZklkJmN0PWc/VIJeYkKhOC0ifOCfSa/giphy_s.gif
- 4 | 29840 | https://media3.giphy.com/media/v1.Y2lkPTc5ODAyNjliZGJmN3phNDk2dHNlbDNmcHRpbmhxdjU1ZjA5amx3ZWk1NWlzdzlhbiZlcD12MV9naWZzX2dpZklkJmN0PWc/niFwnBm5mg5azbfV5c/giphy_s.gif
- 5 | 29817 | https://into-action-green-static-files.s3.amazonaws.com/content/2025/06/02/RWJF_CJ_AlexRosales-GenevaPierre-LincolnHarrison_1.png
- 6 | 29850 | https://into-action-green-static-files.s3.amazonaws.com/content/2025/06/02/RWJF_CJ_ProjectsPetite.png
- 7 | 29353 | https://media3.giphy.com/media/v1.Y2lkPTc5ODAyNjliajM2cmpkajkxYWp2czh6dWtzZ29ybnFra2lybncyaXUxOWQ5cGF5NCZlcD12MV9naWZzX2dpZklkJmN0PWc/DKa0XWo3PfSyuFnZSF/giphy_s.gif
- 8 | 29847 | https://into-action-green-static-files.s3.amazonaws.com/content/2025/06/02/RWJF_CJ_Melonsharks.png
- 9 | 29860 | https://into-action-green-static-files.s3.amazonaws.com/content/2025/06/02/RWJF_CJ_Yaicecream.png
- 10 | 29830 | https://media4.giphy.com/media/v1.Y2lkPTc5ODAyNjliYXB0cGUwdXFxdTI0MjRqcW1yZnJhNWg5MmYxZnMwNjRjMjc5M3hsMiZlcD12MV9naWZzX2dpZklkJmN0PWc/2BECd3EF7VpMmlHiuE/giphy_s.gif
- 11 | 29846 | https://into-action-green-static-files.s3.amazonaws.com/content/2025/06/02/RWJF_CJ_MeghanAuer.png
- """
Advertisement
Add Comment
Please, Sign In to add comment