Guest User

Wallpaper Scraper

a guest
Jun 23rd, 2020
317
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.57 KB | None | 0 0
  1. import praw
  2. from bs4 import BeautifulSoup
  3. from urllib.request import Request, urlopen
  4. from urllib.parse import quote
  5. import requests
  6. import sys
  7. import os
  8.  
  9. os.chdir("/Users/student/Desktop/Background_Images")
  10. print("Navigated to directory...")
  11.  
  12. SCRIPT = "HIDDEN"
  13. SECRET = "HIDDEN"
  14. NUM_IMAGES = 100
  15.  
  16. reddit = praw.Reddit(
  17.     client_id=SCRIPT,
  18.     client_secret=SECRET,
  19.     user_agent="ImageBot",
  20.     username="LAcuber",
  21.     password="HIDDEN",
  22. )
  23.  
  24. subreddit = reddit.subreddit("wallpaper")
  25.  
  26. links = []
  27.  
  28. for submission in subreddit.hot(limit=NUM_IMAGES):
  29.     links.append("https://reddit.com" + submission.permalink)
  30.  
  31.  
  32. images = []
  33. for i in range(len(links)):
  34.     sys.stdout.write(f"\rProcessing image {i+1} of {NUM_IMAGES}.")
  35.  
  36.     link_decoded = links[i][:21] + quote(links[i][21:])
  37.  
  38.     req = Request(link_decoded, headers={"User-Agent": "Mozilla/5.0"})
  39.    
  40.     html_page = urlopen(req)
  41.  
  42.     soup = BeautifulSoup(html_page, "lxml")
  43.  
  44.     for link in soup.findAll("a"):
  45.  
  46.         if (
  47.             "https://i.redd.it/" in str(link.get("href"))[0:18]
  48.             and str(link.get("href")) not in images
  49.         ):
  50.             images.append(link.get("href"))
  51.  
  52. for i in range(len(images)):  # todo -> check image size, if under 200 KB
  53.     sys.stdout.write(f"\rWriting image {i+1} of {len(images)}.")
  54.     img_data = requests.get(images[i]).content
  55.     with open(f"image_{i+1}.jpg", "wb") as handler:
  56.         handler.write(img_data)
  57.         if os.stat(f"image_{i+1}.jpg").st_size < 200000:  # under 200 KB, too fuzzy
  58.             os.remove(f"image_{i+1}.jpg")
Add Comment
Please, Sign In to add comment