Advertisement
Guest User

Untitled

a guest
May 6th, 2022
81
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.21 KB | None | 0 0
  1. from datetime import datetime
  2. import requests
  3. import csv
  4. import bs4
  5.  
  6. USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.3 Safari/605.1.15"
  7. REQUEST_HEADER = {
  8.     "User-Agent": USER_AGENT,
  9.     "Accept-Language": "en-US, en;q=0.5"
  10. }
  11.  
  12. def get_page_html(url):
  13.     res = requests.get(url=url, headers=REQUEST_HEADER) #res = response
  14.     return res.content
  15.  
  16. def get_product_price(soup):
  17.     main_price_span = soup.find("span", attrs={
  18.         "class": "a-price aok-align-center reinventPricePriceToPayPadding priceToPay"
  19.     })
  20.     price_spans = main_price_span.findAll("span")
  21.     for span in price_spans:
  22.         price = span.text.strip().replace("$", "").replace(",", "")
  23.         print(price)
  24.  
  25. def extract_product_info(url):
  26.     product_info = {}
  27.     print(f"Scraping URL: {url}")
  28.     html = get_page_html(url)
  29.     soup = bs4.BeautifulSoup(html, "lxml")
  30.     product_info["price"] = get_product_price(soup)
  31.  
  32. if __name__ == '__main__':
  33.     with open("amazon_products_urls.csv", newline="") as csvfile:
  34.         reader = csv.reader(csvfile, delimiter=",")
  35.         for row in reader:
  36.             url = row[0]
  37.         print(extract_product_info(url))
  38.  
  39.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement