Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import bs4
- import requests
- def get_url(page):
- url_pattern = r'http://www.kazar.com/pl/sklep/kobieta/buty/filtry/strona,{}'
- return url_pattern.format(page)
- if __name__ == '__main__':
- page = 1
- links = set()
- while True:
- url = get_url(page)
- response = requests.get(url)
- if response.status_code != 200:
- raise Exception('Bad status code {}'.format(response.status_code))
- soup = bs4.BeautifulSoup(response.content, 'html.parser')
- items = soup.find_all('a', class_='pr-see')
- before = len(links)
- for item in items:
- links.add(item['href'])
- if len(links) == before:
- break
- print(links)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement