Advertisement
Guest User

Untitled

a guest
Jul 24th, 2019
167
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.91 KB | None | 0 0
  1. from bs4 import BeautifulSoup as soup
  2. from urllib.request import urlopen as uReq
  3. import csv
  4.  
  5.  
  6.  
  7. my_url = 'https://www.yelp.com/search?find_desc=Restaurants&find_loc=San%20Francisco%2C%20CA'
  8. uClient = uReq(my_url)
  9. page_html = uClient.read()
  10. uClient.close()
  11. bs = soup(page_html, "html.parser")
  12.  
  13.  
  14. f = csv.writer(open('yelp_data.csv', 'w'))
  15. f.writerow(['restaurant_title', 'restaurant_address', 'restaurant_numReview', 'restaurant_starCount', 'restaurant_price', 'restaurant_category', 'restaurant_district'])
  16.  
  17.  
  18. yelp_containers = bs.findAll("div", {"class": "lemon--div__373c0__1mboc searchResult__373c0__1yggB border-color--default__373c0__2xHhl"})
  19.  
  20.  
  21. for yelp_container, item in enumerate(yelp_containers):
  22.  
  23.         print("restaurant number #", yelp_container + 1)
  24.  
  25.         restaurant_title = yelp_containers[yelp_container].find("a", {"class": "lemon--a__373c0__IEZFH link__373c0__29943 link-color--blue-dark__373c0__1mhJo link-size--inherit__373c0__2JXk5"}).text
  26.         print("restaurant_title: ", restaurant_title)
  27.  
  28.         # AttributeError: 'NoneType' object has no attribute 'get_text'
  29.         restaurant_address = yelp_containers[yelp_container].find("address", {"class": 'lemon--address__373c0__2sPac'}).get_text()
  30.         print("restaurant_address: ", restaurant_address)
  31.  
  32.         restaurant_numReview = yelp_containers[yelp_container].find("span", {"class": "lemon--span__373c0__3997G text__373c0__2pB8f reviewCount__373c0__2r4xT text-color--mid__373c0__3G312 text-align--left__373c0__2pnx_"}).get_text()
  33.         print("restaurant_numReview: ", restaurant_numReview)
  34.  
  35.         # TypeError: 'NoneType' object is not subscriptable
  36.         restaurant_starCount = yelp_containers[yelp_container].find("div", {"class": "lemon--div__373c0__1mboc i-stars__373c0__30xVZ i-stars--regular-4__373c0__2R5IO border-color--default__373c0__2oFDT overflow--hidden__373c0__8Jq2I"})['aria-label']
  37.         print("restaurant_starCount: ", restaurant_starCount)
  38.  
  39.         restaurant_price = yelp_containers[yelp_container].find("div", {"class": "lemon--div__373c0__1mboc priceCategory__373c0__3zW0R border-color--default__373c0__2xHhl"}).find('span').text
  40.         print("restaurant_price: ", restaurant_price)
  41.  
  42.         restaurant_category = yelp_containers[yelp_container].find("span", {"class": "lemon--span__373c0__3997G display--inline__373c0__3nr-o border-color--default__373c0__2xHhl"}).text
  43.         print("restaurant_category: ", restaurant_category)
  44.  
  45.         # AttributeError: 'NoneType' object has no attribute 'text'
  46.         restaurant_district = yelp_containers[yelp_container].find("div", {"class": "lemon--div__373c0__1mboc display--inline-block__373c0__25zhW border-color--default__373c0__2xHhl"}).text
  47.         print("restaurant_district: ", restaurant_district)
  48.  
  49.         f.writerow([restaurant_title, restaurant_address, restaurant_numReview, restaurant_starCount, restaurant_price, restaurant_category, restaurant_district])
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement