Advertisement
Guest User

Untitled

a guest
Feb 27th, 2017
76
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.04 KB | None | 0 0
  1. from urllib.request import urlopen as uReq
  2. from bs4 import BeautifulSoup as soup
  3.  
  4. my_url = 'https://www.newegg.com/Video-Cards-Video-Devices/Category/ID-38? Tpk=graphics%20cards'
  5.  
  6. # opening up connection, grabbing the page
  7. uClient = uReq(my_url)
  8. page_html = uClient.read()
  9. uClient.close()
  10.  
  11. # html parsing
  12. page_soup = soup(page_html, "html.parser")
  13.  
  14. # grabs each product
  15. containers = page_soup.findAll("div", {"class":"item-container"})
  16.  
  17.  
  18. filename = "products.csv"
  19. f = open(filename, "w")
  20.  
  21. headers = "brand, product_name, shippingn"
  22.  
  23. f.write(headers)
  24.  
  25.  
  26. for container in containers:
  27. brand = container.div.div.a.img["title"]
  28.  
  29. title_container = container.findAll("a",{"class":"item-title"})
  30. product_name = title_container[0].text
  31.  
  32. shipping_container = container.findAll("li", {"class":"price-ship"})
  33. shipping = shipping_container[0].text.strip()
  34.  
  35. print("brand: " + brand)
  36. print("product_name: " + product_name)
  37. print("shipping: " + shipping + "n")
  38.  
  39. f.write(brand + "," + product_name.replace(",", "|") + "," + shipping + "n")
  40.  
  41. f.close()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement