Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from extract_emails import ExtractEmails
- from email.utils import parseaddr
- import re
- import cloudscraper
- from bs4 import BeautifulSoup
- def getEmail(source):
- e_mails =parseaddr(source)
- e_mails = [e for e in e_mails if "@" in e]
- print(e_mails)
- def getEmail(url):
- em = ExtractEmails("https://carealestate.herokuapp.com", depth=7)
- return em.emails
- def getPhone(source):
- phones_array = []
- phones = re.findall(r'[\+\(]?[0-9][0-9 .\-\(\)]{8,}[0-9]', source)
- prefixes = ["1300", "1800",
- "02", "03", "04", "05", "07", "08",
- "612", "613", "614", "615", "617", "618"]
- for prefix in prefixes:
- for phone in phones:
- if phone.startswith(str(prefix)) and len(phone.strip().replace(" ", "").strip()) <= 13 and phone.strip().replace(" ", "").strip().isnumeric():
- phones_array.append(phone)
- return phones_array
- #em = ExtractEmails("https://carealestate.herokuapp.com", depth=1)
- #print(em.emails)
- '''
- Source = <p><strong>Kuala Lumpur</strong><strong>:</strong> +60 (0)3 2723 7900</p>
- <p><strong>Mutiara Damansara:</strong> +60 (0)3 2723 7900</p>
- <p><strong>Penang:</strong> + 60 (0)4 255 9000</p>
- <h2>Where we are </h2>
- <strong> Call us on:</strong> +6 (03) 8924 8686
- </p></div><div class="sys_two">
- <h3 class="parentSchool">General enquiries</h3><p style="FONT-SIZE: 11px">
- <strong> Call us on:</strong> +6 (03) 8924 8000
- + 60 (7) 268-6200 <br />
- Fax:<br />
- +60 (7) 228-6202<br />
- Phone:</strong><strong style="color: #f00">+601-4228-8055</strong>
- '''
- scraper = cloudscraper.CloudScraper(browser="chrome")
- #url = "https://www.softwaretestinghelp.com/email-service-providers/amp/"
- url = "https://aaa.edu.au"
- r = scraper.get(url)
- soup = BeautifulSoup(r.text, "html.parser")
- Source = str(soup)
- print(getPhone(Source))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement