Advertisement
collinsanele

Ben New

May 30th, 2020
719
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.87 KB | None | 0 0
  1. from extract_emails import ExtractEmails
  2. from email.utils import parseaddr
  3. import re
  4. import cloudscraper
  5. from bs4 import BeautifulSoup
  6.  
  7.  
  8.  
  9. def getEmail(source):
  10.     e_mails =parseaddr(source)
  11.     e_mails = [e for e in e_mails if "@" in e]
  12.     print(e_mails)
  13.    
  14.    
  15.    
  16. def getEmail(url):
  17.     em = ExtractEmails("https://carealestate.herokuapp.com", depth=7)
  18.    
  19.     return em.emails
  20.        
  21.    
  22.  
  23.  
  24.  
  25. def getPhone(source):
  26.     phones_array = []
  27.     phones = re.findall(r'[\+\(]?[0-9][0-9 .\-\(\)]{8,}[0-9]', source)
  28.    
  29.     prefixes = ["1300", "1800",
  30.     "02", "03", "04", "05", "07", "08",
  31.     "612", "613", "614", "615", "617", "618"]
  32.    
  33.     for prefix in prefixes:
  34.         for phone in phones:
  35.             if phone.startswith(str(prefix)) and len(phone.strip().replace(" ", "").strip()) <= 13 and phone.strip().replace(" ", "").strip().isnumeric():
  36.                 phones_array.append(phone)
  37.                
  38.     return phones_array
  39.    
  40.  
  41.  
  42. #em = ExtractEmails("https://carealestate.herokuapp.com", depth=1)
  43.  
  44. #print(em.emails)
  45.  
  46.  
  47. '''
  48. Source = <p><strong>Kuala Lumpur</strong><strong>:</strong> +60 (0)3 2723 7900</p>
  49.        <p><strong>Mutiara Damansara:</strong> +60 (0)3 2723 7900</p>
  50.        <p><strong>Penang:</strong> + 60 (0)4 255 9000</p>
  51.        <h2>Where we are </h2>
  52.        <strong>&nbsp;Call us on:</strong>&nbsp;+6 (03) 8924 8686
  53.        </p></div><div class="sys_two">
  54.    <h3 class="parentSchool">General enquiries</h3><p style="FONT-SIZE: 11px">
  55.     <strong>&nbsp;Call us on:</strong>&nbsp;+6 (03) 8924 8000
  56. + 60 (7) 268-6200 <br />
  57. Fax:<br />
  58. +60 (7) 228-6202<br />
  59. Phone:</strong><strong style="color: #f00">+601-4228-8055</strong>
  60.  
  61. '''
  62.  
  63.  
  64.  
  65. scraper = cloudscraper.CloudScraper(browser="chrome")
  66.  
  67.  
  68. #url = "https://www.softwaretestinghelp.com/email-service-providers/amp/"
  69.  
  70.  
  71. url = "https://aaa.edu.au"
  72.  
  73.  
  74. r = scraper.get(url)
  75.  
  76.  
  77. soup = BeautifulSoup(r.text, "html.parser")
  78.  
  79. Source = str(soup)
  80.  
  81. print(getPhone(Source))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement