Advertisement
Guest User

Untitled

a guest
Aug 10th, 2019
165
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.28 KB | None | 0 0
  1. import requests
  2. import random
  3. import csv
  4. import time
  5. from bs4 import BeautifulSoup
  6.  
  7. headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36'}
  8.  
  9.  
  10. f = csv.writer(open('zomato_data.csv', 'w'))
  11. f.writerow(['title', 'address', 'district', 'cost_for_two', 'cuisines', 'rating', 'numVotes'])
  12.  
  13.  
  14. response = requests.get("https://www.zomato.com/san-francisco/restaurants?page=807", headers=headers)
  15. content = response.content
  16. bs = BeautifulSoup(content, "html.parser")
  17.  
  18. zomato_containers = bs.find_all("div", {"class": "search-snippet-card"})
  19.  
  20. for zomato_container in zomato_containers:
  21.  
  22.     title = zomato_container.find("a", {"class": "result-title"}).get_text()
  23.  
  24.     try:
  25.         address = zomato_container.find("div", {"class": "search-result-address"}).get_text()
  26.         if address is None:
  27.             address = 'N/A'
  28.         district = zomato_container.find("a", {"class": "search_result_subzone"}).get_text()
  29.         if district is None:
  30.             district = 'N/A'
  31.         cost_for_two = zomato_container.select_one('[class*="col-s-11 col-m-12 pl0"]').get_text(separator='|', strip=True).split('|')
  32.         cost_for_two = cost_for_two[1] if len(cost_for_two) > 1 else cost_for_two[0]
  33.         if cost_for_two is None:
  34.             cost_for_two = 'N/A'
  35.         cuisines = zomato_container.find("div", {"class": "res-snippet-small-establishment mt5"}).get_text()
  36.         if cuisines is None:
  37.             cuisines = 'N/A'
  38.         rating = zomato_container.select_one('.rating-popup').text.strip()
  39.         if rating is None:
  40.             rating = 'N/A'
  41.         numVotes = zomato_container.select_one('[class^=rating-votes-div]').text  # match on elements with class attribute whose values starts with rating-votes-div
  42.         if numVotes  is None:
  43.             numVotes = 'N/A'
  44.  
  45.     except AttributeError:
  46.         continue
  47.  
  48.     print("restaurant_title: ", title)
  49.     print("restaurant_address: ", address)
  50.     print("restaurant_district: ", district)
  51.     print("cost_for_two: ", cost_for_two)
  52.     print("restaurant_cuisines: ", cuisines)
  53.     print("rating: ", rating)
  54.     print("numVotes: ", numVotes)
  55.  
  56.     f.writerow([title, address, district, cost_for_two, cuisines, rating, numVotes])
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement