Advertisement
TankorSmash

yellowpagescrapper.py

Nov 8th, 2012
152
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 0.98 KB | None | 0 0
  1. import bs4
  2.  
  3. #not going to write it for you, but get rid of all the text you don't need.
  4. # that means that whatever isn't in the clearfix paid-listing div, you can get rid of
  5. # Python can use all the speed it can get here
  6. with open('html.html') as f:
  7.     html = f.read()
  8.  
  9. soup = bs4.BeautifulSoup(html)
  10.  
  11. # grab all the item entries
  12. entries = soup.findAll('div', {'class' : "clearfix paid-listing result track-listing vcard"})
  13.  
  14. for i, entry in enumerate(entries):
  15. #find the business name, which is in the text attrib of the first h3 tag
  16.     business_name= entry.find('h3').text
  17. #find the street address
  18.     business_addy= entry.find('span', {'class': 'street-address'}).text
  19. #find the phone number
  20.     business_phone= entry.find('span', {'class': 'business-phone phone'}).text
  21.    
  22.     print " --- Entry {0} --- ".format(i)
  23.     print "name:", business_name.strip()
  24.     print "address:", business_addy.strip()
  25.     print "phone:", business_phone.strip()
  26.     print "\n"
  27.  
  28. print 'asd'
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement