Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import urllib2
- from bs4 import BeautifulSoup
- import re
- def extract_link(url):
- page = urllib2.urlopen(url)
- return BeautifulSoup(page, 'html.parser')
- def dineline(link):
- page = urllib2.urlopen(link)
- page_text = page.read()
- new_text = re.sub('<br>', '', page_text)
- new_text = re.sub('</br>', '', new_text)
- soup = BeautifulSoup(new_text, 'html.parser')
- resto = soup.find_all('div', class_='ui segment brtop')
- # print resto[2]
- # resto name, time working fine...
- # rating is not...
- one_resto(soup, resto[5]) # index is the entry..
- def one_resto(soup, resto):
- body = resto.find('div', class_='content')
- resto_block = body.find('a')
- name = resto_block.string # name of the restaurant
- link = resto_block.attrs[u'href']
- entity_id = resto_block.attrs[u'data-entity_id']
- rev_time = resto.find('time').attrs[u'datetime']
- review_block = resto.find('div', class_='act-body mbot0')
- review_block = review_block.find('div', class_='rev-text')
- class_name = 'left bold zdhl2 tooltip icon-font-level-5'
- # rating = float(review_block.find('div', class_=class_name).attrs[u'aria-label'].split()[-1])
- rev_text_block = review_block.find('div', class_='clear')
- review_text = rev_text_block.previous # actual review text
- print 'Restaurant: {}, Rating: , Review Date: {}, ' \
- '\n Review Text: {}'.format(name, rev_time, review_text)
- def main():
- dineline(r"file:\\C:\\Users\\cereal_killer\\PycharmProjects\\zomato\\dl.html")
- if __name__ == '__main__':
- main()
Add Comment
Please, Sign In to add comment