Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from BeautifulSoup import BeautifulSoup
- import urllib2
- import re
- regex = r"(.*@.*)"
- html_page = urllib2.urlopen("http://www.optimumgym.co.uk/")
- soup = BeautifulSoup(html_page)
- for link in soup.findAll('a'):
- current_link = link.get('href')
- if current_link.find("contact") > -1:
- current_page = urllib2.urlopen(current_link)
- soup2 = BeautifulSoup(current_page)
- for link2 in soup2.findAll('a'):
- suspect_text = link2.get('href')
- matches = re.finditer(regex, suspect_text)
- for matchNum, match in enumerate(matches):
- matchNum = matchNum + 1
- print ("Match {matchNum} was found at {start}-{end}: {match}".format(matchNum = matchNum, start = match.start(), end = match.end(), match = match.group()))
- for groupNum in range(0, len(match.groups())):
- groupNum = groupNum + 1
- print ("Group {groupNum} found at {start}-{end}: {group}".format(groupNum = groupNum, start = match.start(groupNum), end = match.end(groupNum), group = match.group(groupNum)))
- else:
- print "nothing here"
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement