JoaquinFioriti

Untitled

Aug 29th, 2020
84
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.83 KB | None | 0 0
  1. from bs4 import BeautifulSoup
  2. import requests
  3.  
  4. baseurl = 'https://www.entertainmentearth.com/'
  5.  
  6. headers = {
  7. 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36'
  8. }
  9. productLinks = []
  10. for x in range(1,3):
  11. if (x==1):
  12. r = requests.get('https://www.entertainmentearth.com/s/action-figures/p?query1=Action%20Figures&producttype=action-figures&landingpageid=5397&selectedfacets=__Theme=One+Piece%7CDragon+Ball&sort=bestsellers')
  13. else:
  14. r = requests.get('https://www.entertainmentearth.com/s/action-figures/p?query1=Action%20Figures&producttype=action-figures&landingpageid=5397&selectedfacets=__Theme=Dragon+Ball%7COne+Piece&page=2&sort=bestsellers')
  15. soup = BeautifulSoup(r.content, 'lxml')
  16. productList = soup.find_all('div', class_='grid-view item col-sm-4 col-lg-4 col-md-4 col-xs-12 product-tile')
  17.  
  18. for item in productList:
  19. for link in item.find_all('div', class_='image'):
  20. url=link.find('a')
  21. productLinks.append(baseurl + url['href'])
  22.  
  23. print(len(productLinks))
  24.  
  25.  
  26. x=0
  27. for link in productLinks:
  28.  
  29. print(x)
  30. r = requests.get(link, headers=headers)
  31. soup = BeautifulSoup(r.content,'lxml')
  32. post_title = soup.find('h1').text.strip()
  33. sku = soup.find('h5',class_='item-sku')
  34.  
  35. try:
  36. sku = sku.text
  37. sku = sku.split(':')[1].strip()
  38. except Exception as e:
  39. sku = None
  40.  
  41. regular_price = soup.find('span', class_='product-price')
  42. try:
  43. regular_price = regular_price.text
  44. regular_price = regular_price.split('$')[1]
  45. regular_price = float(regular_price)*3
  46. sale_price = regular_price*0.6
  47. except Exception as e:
  48. regular_price = None
  49. sale_price = None
  50.  
  51. figure = {
  52. 'post_title': post_title,
  53. 'sku': sku,
  54. 'regular_price':regular_price,
  55. 'sale_price':sale_price
  56. }
  57. print(figure)
  58.  
  59. x=x+1
Add Comment
Please, Sign In to add comment