Advertisement
Guest User

-_-

a guest
Oct 25th, 2016
61
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.17 KB | None | 0 0
  1. from BeautifulSoup import BeautifulSoup
  2. import urllib2
  3. import re
  4. regex = r"(.*@.*)"
  5. html_page = urllib2.urlopen("http://www.optimumgym.co.uk/")
  6. soup = BeautifulSoup(html_page)
  7. for link in soup.findAll('a'):
  8. current_link = link.get('href')
  9. if current_link.find("contact") > -1:
  10. current_page = urllib2.urlopen(current_link)
  11. soup2 = BeautifulSoup(current_page)
  12. for link2 in soup2.findAll('a'):
  13. suspect_text = link2.get('href')
  14. matches = re.finditer(regex, suspect_text)
  15. for matchNum, match in enumerate(matches):
  16. matchNum = matchNum + 1
  17.  
  18. print ("Match {matchNum} was found at {start}-{end}: {match}".format(matchNum = matchNum, start = match.start(), end = match.end(), match = match.group()))
  19.  
  20. for groupNum in range(0, len(match.groups())):
  21. groupNum = groupNum + 1
  22.  
  23. print ("Group {groupNum} found at {start}-{end}: {group}".format(groupNum = groupNum, start = match.start(groupNum), end = match.end(groupNum), group = match.group(groupNum)))
  24.  
  25. else:
  26. print "nothing here"
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement