Advertisement
Guest User

Untitled

a guest
Jan 19th, 2020
108
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.53 KB | None | 0 0
  1. import requests
  2. import re
  3. from bs4 import BeautifulSoup
  4.  
  5. '''
  6. email=ast.jeroen%40hotmail.com
  7. # schrijf iets die datgene selecteert na = en voor %40 en na %40
  8. '''
  9.  
  10.  
  11. allLinks = [];mails=[]
  12. url = 'http://www.hln.be'
  13. response = requests.get(url)
  14. soup=BeautifulSoup(response.text,'html.parser')
  15. links = [a.attrs.get('href') for a in soup.select('a[href]') ]
  16. for i in links:
  17. if(("contact" in i or "Contact")or("Career" in i or "career" in i))or('about' in i or "About" in i)or('Services' in i or 'services' in i):
  18. allLinks.append(i)
  19. allLinks=set(allLinks)
  20. def findMails(soup):
  21. for name in soup.find_all('a'):
  22. if(name is not None):
  23. emailText=name.text
  24. match=bool(re.match('[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$',emailText))
  25. if('@' in emailText and match==True):
  26. emailText=emailText.replace(" ",'').replace('\r','')
  27. emailText=emailText.replace('\n','').replace('\t','')
  28. if(len(mails)==0)or(emailText not in mails):
  29. print(emailText)
  30. mails.append(emailText)
  31. for link in allLinks:
  32. if(link.startswith("http") or link.startswith("www")):
  33. r=requests.get(link)
  34. data=r.text
  35. soup=BeautifulSoup(data,'html.parser')
  36. findMails(soup)
  37.  
  38. else:
  39. newurl=url+link
  40. r=requests.get(newurl)
  41. data=r.text
  42. soup=BeautifulSoup(data,'html.parser')
  43. findMails(soup)
  44.  
  45. mails=set(mails)
  46. if(len(mails)==0):
  47. print("NO MAILS FOUND")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement