Advertisement
Guest User

Untitled

a guest
Apr 28th, 2017
88
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 0.72 KB | None | 0 0
  1. import bs4
  2. import requests
  3.  
  4.  
  5. def get_url(page):
  6.     url_pattern = r'http://www.kazar.com/pl/sklep/kobieta/buty/filtry/strona,{}'
  7.     return url_pattern.format(page)
  8.  
  9. if __name__ == '__main__':
  10.     page = 1
  11.  
  12.     links = set()
  13.  
  14.     while True:
  15.         url = get_url(page)
  16.         response = requests.get(url)
  17.  
  18.         if response.status_code != 200:
  19.             raise Exception('Bad status code {}'.format(response.status_code))
  20.  
  21.         soup = bs4.BeautifulSoup(response.content, 'html.parser')
  22.  
  23.         items = soup.find_all('a', class_='pr-see')
  24.  
  25.         before = len(links)
  26.  
  27.         for item in items:
  28.             links.add(item['href'])
  29.  
  30.         if len(links) == before:
  31.             break
  32.  
  33.     print(links)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement