Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # Python script to download every book from Standard Ebooks
- # Will work for Linux, should work for Mac, might break for Windows
- from bs4 import BeautifulSoup
- import requests as req
- import os
- import pdb
- BASE_URL = "https://standardebooks.org"
- for i in range(1, 37):
- r = req.get(BASE_URL + "/ebooks?page=" + str(i))
- soup = BeautifulSoup(r.text, "html.parser")
- urls = [link.get('href') for link in soup.find_all('a')]
- book_urls = [url for url in urls[:-49] if type(url) is str and "ebooks" in url and url.count('/') >= 2]
- unique_book_urls = book_urls[::3]
- for working_url in unique_book_urls:
- rr = req.get(BASE_URL + working_url)
- new_soup = BeautifulSoup(rr.text, "html.parser")
- page_urls = [link.get('href') for link in new_soup.find_all('a')]
- try:
- epub_link = [url for url in page_urls if ".epub" in url][0]
- except Exception:
- break
- wget_string = "wget --no-clobber {}".format(BASE_URL + epub_link)
- os.system(wget_string)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement