from extract_emails import ExtractEmails from email.utils import parseaddr import re import cloudscraper from bs4 import BeautifulSoup def getEmail(source): e_mails =parseaddr(source) e_mails = [e for e in e_mails if "@" in e] print(e_mails) def getEmail(url): em = ExtractEmails("https://carealestate.herokuapp.com", depth=7) return em.emails def getPhone(source): phones_array = [] phones = re.findall(r'[\+\(]?[0-9][0-9 .\-\(\)]{8,}[0-9]', source) prefixes = ["1300", "1800", "02", "03", "04", "05", "07", "08", "612", "613", "614", "615", "617", "618"] for prefix in prefixes: for phone in phones: if phone.startswith(str(prefix)) and len(phone.strip().replace(" ", "").strip()) <= 13 and phone.strip().replace(" ", "").strip().isnumeric(): phones_array.append(phone) return phones_array #em = ExtractEmails("https://carealestate.herokuapp.com", depth=1) #print(em.emails) ''' Source =

Kuala Lumpur: +60 (0)3 2723 7900

Mutiara Damansara: +60 (0)3 2723 7900

Penang: + 60 (0)4 255 9000

Where we are

Call us on: +6 (03) 8924 8686

General enquiries

Call us on: +6 (03) 8924 8000 + 60 (7) 268-6200
Fax:
+60 (7) 228-6202
Phone:+601-4228-8055 ''' scraper = cloudscraper.CloudScraper(browser="chrome") #url = "https://www.softwaretestinghelp.com/email-service-providers/amp/" url = "https://aaa.edu.au" r = scraper.get(url) soup = BeautifulSoup(r.text, "html.parser") Source = str(soup) print(getPhone(Source))