Advertisement
Guest User

Untitled

a guest
Oct 17th, 2019
84
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.57 KB | None | 0 0
  1. import urllib.request
  2. from bs4 import BeautifulSoup
  3. from datetime import datetime
  4.  
  5. prefix = "https://www.ceneo.pl/"
  6. postfix = "/opinie-"
  7. product_id = "45498942"
  8. page_num = 1
  9.  
  10. url = prefix+product_id+postfix+str(page_num)
  11.  
  12. #pobranie zawartosci strony
  13. site = urllib.request.urlopen(url)
  14. page = site.read()
  15.  
  16. page_tree = BeautifulSoup(page, 'html.parser')
  17.  
  18. opinions_num = int(page_tree.find("span", attrs={"itemprop": "reviewCount"}).string)
  19. print(opinions_num)
  20.  
  21.  
  22. #parsowanie kodu strony
  23. # opinions = page_tree.find_all("li" , attrs={ "class": "review-box"})
  24. opinions = page_tree.select("li.review-box")
  25.  
  26. for opinion in opinions:
  27.    
  28.  
  29.     id = int(opinion["data-entry-id"])
  30.  
  31.     author = (opinion.select("div.reviewer-name-line")).pop().stringa
  32.     try:
  33.         recomendation = (opinion.select("div.product-review-summary > em")).pop().string
  34.     except IndexError:
  35.         recomendation = "BRAK"
  36.  
  37.     stars =  (opinion.select("span.review-score-count")).pop().string
  38.     content = (opinion.select("p.product-review-body")).pop().get_text()
  39.     useful = (opinion.select("[id^=votes-yes]")).pop().string
  40.     unuseful = (opinion.select("[id^=votes-no]")).pop().string
  41.     time = opinion.select("div > span.review-time > time")
  42.  
  43.     add_date = datetime.strptime(time.pop()["datetime"], "%Y-%m-%d %H:%M:%S")
  44.    
  45.     if time:
  46.         purchase_date = datetime.strptime(time.pop()["datetime"], "%Y-%m-%d %H:%M:%S")
  47.     else:
  48.         purchase_date = None
  49.  
  50.     adavnatges = (opinion.select("div.cons-cell > ul")).pop().get_text()
  51.    
  52.     print(adavnatges)
  53.     #print(author, id, recomendation, stars, content, useful, unuseful, end="\n")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement