from extract_emails import ExtractEmails from email.utils import parseaddr import re import cloudscraper from bs4 import BeautifulSoup def getEmail(source): e_mails =parseaddr(source) e_mails = [e for e in e_mails if "@" in e] print(e_mails) def getEmail(url): em = ExtractEmails("https://carealestate.herokuapp.com", depth=7) return em.emails def getPhone(source): phones_array = [] phones = re.findall(r'[\+\(]?[0-9][0-9 .\-\(\)]{8,}[0-9]', source) prefixes = ["1300", "1800", "02", "03", "04", "05", "07", "08", "612", "613", "614", "615", "617", "618"] for prefix in prefixes: for phone in phones: if phone.startswith(str(prefix)) and len(phone.strip().replace(" ", "").strip()) <= 13 and phone.strip().replace(" ", "").strip().isnumeric(): phones_array.append(phone) return phones_array #em = ExtractEmails("https://carealestate.herokuapp.com", depth=1) #print(em.emails) ''' Source =
Kuala Lumpur: +60 (0)3 2723 7900
Mutiara Damansara: +60 (0)3 2723 7900
Penang: + 60 (0)4 255 9000
Call us on: +6 (03) 8924 8000
+ 60 (7) 268-6200
Fax:
+60 (7) 228-6202
Phone:+601-4228-8055
'''
scraper = cloudscraper.CloudScraper(browser="chrome")
#url = "https://www.softwaretestinghelp.com/email-service-providers/amp/"
url = "https://aaa.edu.au"
r = scraper.get(url)
soup = BeautifulSoup(r.text, "html.parser")
Source = str(soup)
print(getPhone(Source))