Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # I required a pdf from azdelivery, but I could not find a search function
- # so I scrape all pages
- from collections.abc import Generator
- from itertools import count
- import requests
- from bs4 import BeautifulSoup
- BASE_URL = "https://www.az-delivery.de/en/collections/kostenlose-e-books?page={page}"
- def get(page: int) -> tuple[str, list[str]]:
- resp = requests.get(url := BASE_URL.format(page=page))
- elements = BeautifulSoup(resp.content, "html.parser").select(
- "a.product-item__title"
- )
- return url, [e.text for e in elements]
- def walk(start=1) -> Generator[tuple[str, list[str]], None, None]:
- for page in count(1):
- url, elements = get(page)
- if not elements:
- return
- yield url, elements
- results = {}
- for url, elements in walk():
- results[url] = elements
- print(url)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement