Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from bs4 import BeautifulSoup
- import requests
- baseurl = 'https://www.entertainmentearth.com/'
- headers = {
- 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36'
- }
- productLinks = []
- for x in range(1,3):
- if (x==1):
- r = requests.get('https://www.entertainmentearth.com/s/action-figures/p?query1=Action%20Figures&producttype=action-figures&landingpageid=5397&selectedfacets=__Theme=One+Piece%7CDragon+Ball&sort=bestsellers')
- else:
- r = requests.get('https://www.entertainmentearth.com/s/action-figures/p?query1=Action%20Figures&producttype=action-figures&landingpageid=5397&selectedfacets=__Theme=Dragon+Ball%7COne+Piece&page=2&sort=bestsellers')
- soup = BeautifulSoup(r.content, 'lxml')
- productList = soup.find_all('div', class_='grid-view item col-sm-4 col-lg-4 col-md-4 col-xs-12 product-tile')
- for item in productList:
- for link in item.find_all('div', class_='image'):
- url=link.find('a')
- productLinks.append(baseurl + url['href'])
- print(len(productLinks))
- x=0
- for link in productLinks:
- print(x)
- r = requests.get(link, headers=headers)
- soup = BeautifulSoup(r.content,'lxml')
- post_title = soup.find('h1').text.strip()
- sku = soup.find('h5',class_='item-sku')
- try:
- sku = sku.text
- sku = sku.split(':')[1].strip()
- except Exception as e:
- sku = None
- regular_price = soup.find('span', class_='product-price')
- try:
- regular_price = regular_price.text
- regular_price = regular_price.split('$')[1]
- regular_price = float(regular_price)*3
- sale_price = regular_price*0.6
- except Exception as e:
- regular_price = None
- sale_price = None
- figure = {
- 'post_title': post_title,
- 'sku': sku,
- 'regular_price':regular_price,
- 'sale_price':sale_price
- }
- print(figure)
- x=x+1
Add Comment
Please, Sign In to add comment