Advertisement
nathan_markerio

Download all images from blog post

Nov 14th, 2023
132
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.31 KB | None | 0 0
  1. import requests
  2. import os
  3.  
  4. from bs4 import BeautifulSoup
  5.  
  6. BLOG_POST_URL = 'ADD_URL_HERE'
  7. OUTPUT_DIR = './downloaded_images/'
  8.  
  9. if not os.path.exists(OUTPUT_DIR):
  10.     os.makedirs(OUTPUT_DIR)
  11.  
  12.  
  13. def download_images_from_post(post_url):
  14.     response = requests.get(post_url)
  15.     soup = BeautifulSoup(response.text, 'html.parser')
  16.  
  17.     no_alt_counter = 0  # Counter for images with no alt text
  18.  
  19.     for img in soup.select('#articlecontent img'):
  20.         img_url = img['src']
  21.  
  22.         # Skip GIFs
  23.         if img_url.endswith('.gif'):
  24.             continue
  25.  
  26.         alt_text = img.get('alt', '')  # Get alt text, or empty string if not present
  27.  
  28.         # If no alt text is provided, use a default name
  29.         if not alt_text:
  30.             no_alt_counter += 1
  31.             alt_text = f"NO_ALT_PROVIDED_{no_alt_counter}"
  32.  
  33.         # Download image
  34.         img_name = os.path.basename(img_url)
  35.         img_data = requests.get(img_url).content
  36.         img_path = os.path.join(OUTPUT_DIR, img_name)
  37.  
  38.         with open(img_path, 'wb') as img_file:
  39.             img_file.write(img_data)
  40.  
  41.         # Save alt text to a .txt file with the same name as the image
  42.         alt_text_path = os.path.join(OUTPUT_DIR, alt_text + '.txt')
  43.         with open(alt_text_path, 'w') as text_file:
  44.             text_file.write(alt_text)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement