Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import requests
- import os
- from bs4 import BeautifulSoup
- BLOG_POST_URL = 'ADD_URL_HERE'
- OUTPUT_DIR = './downloaded_images/'
- if not os.path.exists(OUTPUT_DIR):
- os.makedirs(OUTPUT_DIR)
- def download_images_from_post(post_url):
- response = requests.get(post_url)
- soup = BeautifulSoup(response.text, 'html.parser')
- no_alt_counter = 0 # Counter for images with no alt text
- for img in soup.select('#articlecontent img'):
- img_url = img['src']
- # Skip GIFs
- if img_url.endswith('.gif'):
- continue
- alt_text = img.get('alt', '') # Get alt text, or empty string if not present
- # If no alt text is provided, use a default name
- if not alt_text:
- no_alt_counter += 1
- alt_text = f"NO_ALT_PROVIDED_{no_alt_counter}"
- # Download image
- img_name = os.path.basename(img_url)
- img_data = requests.get(img_url).content
- img_path = os.path.join(OUTPUT_DIR, img_name)
- with open(img_path, 'wb') as img_file:
- img_file.write(img_data)
- # Save alt text to a .txt file with the same name as the image
- alt_text_path = os.path.join(OUTPUT_DIR, alt_text + '.txt')
- with open(alt_text_path, 'w') as text_file:
- text_file.write(alt_text)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement