Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from datetime import datetime
- import requests
- import csv
- import bs4
- USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.3 Safari/605.1.15"
- REQUEST_HEADER = {
- "User-Agent": USER_AGENT,
- "Accept-Language": "en-US, en;q=0.5"
- }
- def get_page_html(url):
- res = requests.get(url=url, headers=REQUEST_HEADER) #res = response
- return res.content
- def get_product_price(soup):
- main_price_span = soup.find("span", attrs={
- "class": "a-price aok-align-center reinventPricePriceToPayPadding priceToPay"
- })
- price_spans = main_price_span.findAll("span")
- for span in price_spans:
- price = span.text.strip().replace("$", "").replace(",", "")
- print(price)
- def extract_product_info(url):
- product_info = {}
- print(f"Scraping URL: {url}")
- html = get_page_html(url)
- soup = bs4.BeautifulSoup(html, "lxml")
- product_info["price"] = get_product_price(soup)
- if __name__ == '__main__':
- with open("amazon_products_urls.csv", newline="") as csvfile:
- reader = csv.reader(csvfile, delimiter=",")
- for row in reader:
- url = row[0]
- print(extract_product_info(url))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement