Guest User

Untitled

a guest
Sep 7th, 2020
114
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.94 KB | None | 0 0
  1. import requests
  2. from bs4 import BeautifulSoup
  3.  
  4. url = 'https://pixabay.com/'
  5. target_url = 'https://pixabay.com/images/search/office/'
  6.  
  7. headers = {
  8. 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.83 Safari/537.36',
  9. "Accept-Language": "en-US;q=0.7,en;q=0.3",
  10. "Cache-Control": "no-cache",
  11. }
  12.  
  13. with requests.Session() as s:
  14. s.headers.update(headers)
  15. r = s.get(url)
  16. print(r.status_code)
  17. r = s.get(target_url)
  18. print(r.status_code)
  19.  
  20. results = []
  21.  
  22. soup = BeautifulSoup(r.text, "lxml")
  23. for item in soup.select(".search_results a > img[src]"):
  24. src = item.get("src")
  25. if src is not None and 'blank.gif' not in src:
  26. print('src:', src)
  27. results.append(src)
  28. else:
  29. src = item.get("data-lazy")
  30. print('data-lazy:', src)
  31. results.append(src)
  32.  
  33. print('len:', len(results))
Add Comment
Please, Sign In to add comment