Advertisement
Guest User

Standard Ebooks Scraper

a guest
Jan 15th, 2021
187
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.03 KB | None | 0 0
  1. # Python script to download every book from Standard Ebooks
  2. # Will work for Linux, should work for Mac, might break for Windows
  3.  
  4. from bs4 import BeautifulSoup
  5. import requests as req
  6. import os
  7. import pdb
  8.  
  9. BASE_URL = "https://standardebooks.org"
  10.  
  11. for i in range(1, 37):
  12.     r = req.get(BASE_URL + "/ebooks?page=" + str(i))
  13.     soup = BeautifulSoup(r.text, "html.parser")
  14.     urls = [link.get('href') for link in soup.find_all('a')]
  15.     book_urls = [url for url in urls[:-49] if type(url) is str and "ebooks" in url and url.count('/') >= 2]
  16.     unique_book_urls = book_urls[::3]
  17.  
  18.     for working_url in unique_book_urls:
  19.         rr = req.get(BASE_URL + working_url)
  20.         new_soup = BeautifulSoup(rr.text, "html.parser")
  21.         page_urls = [link.get('href') for link in new_soup.find_all('a')]
  22.  
  23.         try:
  24.             epub_link = [url for url in page_urls if ".epub" in url][0]
  25.         except Exception:
  26.             break
  27.  
  28.         wget_string = "wget --no-clobber {}".format(BASE_URL + epub_link)
  29.         os.system(wget_string)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement