Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from bs4 import BeautifulSoup as soup
- from urllib.request import urlopen as uReq
- import csv
- my_url = 'https://www.yelp.com/search?find_desc=Restaurants&find_loc=San%20Francisco%2C%20CA'
- uClient = uReq(my_url)
- page_html = uClient.read()
- uClient.close()
- bs = soup(page_html, "html.parser")
- f = csv.writer(open('yelp_data.csv', 'w'))
- f.writerow(['restaurant_title', 'restaurant_address', 'restaurant_numReview', 'restaurant_starCount', 'restaurant_price', 'restaurant_category', 'restaurant_district'])
- yelp_containers = bs.findAll("div", {"class": "lemon--div__373c0__1mboc searchResult__373c0__1yggB border-color--default__373c0__2xHhl"})
- for yelp_container, item in enumerate(yelp_containers):
- print("restaurant number #", yelp_container + 1)
- restaurant_title = yelp_containers[yelp_container].find("a", {"class": "lemon--a__373c0__IEZFH link__373c0__29943 link-color--blue-dark__373c0__1mhJo link-size--inherit__373c0__2JXk5"}).text
- print("restaurant_title: ", restaurant_title)
- # AttributeError: 'NoneType' object has no attribute 'get_text'
- restaurant_address = yelp_containers[yelp_container].find("address", {"class": 'lemon--address__373c0__2sPac'}).get_text()
- print("restaurant_address: ", restaurant_address)
- restaurant_numReview = yelp_containers[yelp_container].find("span", {"class": "lemon--span__373c0__3997G text__373c0__2pB8f reviewCount__373c0__2r4xT text-color--mid__373c0__3G312 text-align--left__373c0__2pnx_"}).get_text()
- print("restaurant_numReview: ", restaurant_numReview)
- # TypeError: 'NoneType' object is not subscriptable
- restaurant_starCount = yelp_containers[yelp_container].find("div", {"class": "lemon--div__373c0__1mboc i-stars__373c0__30xVZ i-stars--regular-4__373c0__2R5IO border-color--default__373c0__2oFDT overflow--hidden__373c0__8Jq2I"})['aria-label']
- print("restaurant_starCount: ", restaurant_starCount)
- restaurant_price = yelp_containers[yelp_container].find("div", {"class": "lemon--div__373c0__1mboc priceCategory__373c0__3zW0R border-color--default__373c0__2xHhl"}).find('span').text
- print("restaurant_price: ", restaurant_price)
- restaurant_category = yelp_containers[yelp_container].find("span", {"class": "lemon--span__373c0__3997G display--inline__373c0__3nr-o border-color--default__373c0__2xHhl"}).text
- print("restaurant_category: ", restaurant_category)
- # AttributeError: 'NoneType' object has no attribute 'text'
- restaurant_district = yelp_containers[yelp_container].find("div", {"class": "lemon--div__373c0__1mboc display--inline-block__373c0__25zhW border-color--default__373c0__2xHhl"}).text
- print("restaurant_district: ", restaurant_district)
- f.writerow([restaurant_title, restaurant_address, restaurant_numReview, restaurant_starCount, restaurant_price, restaurant_category, restaurant_district])
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement