Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import bs4
- #not going to write it for you, but get rid of all the text you don't need.
- # that means that whatever isn't in the clearfix paid-listing div, you can get rid of
- # Python can use all the speed it can get here
- with open('html.html') as f:
- html = f.read()
- soup = bs4.BeautifulSoup(html)
- # grab all the item entries
- entries = soup.findAll('div', {'class' : "clearfix paid-listing result track-listing vcard"})
- for i, entry in enumerate(entries):
- #find the business name, which is in the text attrib of the first h3 tag
- business_name= entry.find('h3').text
- #find the street address
- business_addy= entry.find('span', {'class': 'street-address'}).text
- #find the phone number
- business_phone= entry.find('span', {'class': 'business-phone phone'}).text
- print " --- Entry {0} --- ".format(i)
- print "name:", business_name.strip()
- print "address:", business_addy.strip()
- print "phone:", business_phone.strip()
- print "\n"
- print 'asd'
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement