Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from bs4 import BeautifulSoup
- import requests
- import requests.exceptions
- from urllib.parse import urlsplit
- from collections import deque
- import re
- import smtplib
- from selenium import webdriver
- from selenium.common.exceptions import NoSuchElementException
- from email.mime.multipart import MIMEMultipart
- from email.mime.text import MIMEText
- browser = webdriver.Chrome('C:\Program Files (x86)\Google\Chrome\Application\chromedriver.exe')
- browser.get('https://www.yellowpages.com.au/search/listings?clue=restaurants&eventType=pagination&locationClue=Mackay+Region%2C+QLD&pageNumber=2&referredBy=UNKNOWN')
- call_to_action_divs = browser.find_elements_by_class_name('call-to-action-group')
- heading_divs = browser.find_elements_by_class_name('listing-summary')
- # Create lists
- websites = []
- emails = []
- headings = []
- next_button_exists = True
- ################# DETERMINE FUNCTIONS #####################
- # Get Email And Website
- def get_contact_info(contact_class, div, attribute='href'):
- try:
- return div.find_element_by_class_name(contact_class).get_attribute(attribute)
- except:
- return None
- # Get Heading
- def get_heading(contact_class, div):
- try:
- return div.find_element_by_class_name(contact_class).text
- except:
- return None
- def next_page():
- element = browser.find_element_by_css_selector('div.button-pagination-container a.navigation').get_attribute('href')
- browser.get(element)
- def check_button_exists(contact_class, attribute='href'):
- try:
- browser.find_element_by_css_selector(contact_class).get_attribute(attribute)
- except NoSuchElementException:
- return False
- return True
- def send_mail(self):
- gmailUser = 'myemail@gmail.com'
- gmailPassword = 'P@ssw0rd'
- recipient = 'sendto@gmail.com'
- message='your message here '
- msg = MIMEMultipart()
- msg['From'] = gmailUser
- msg['To'] = recipient
- msg['Subject'] = "Subject of the email"
- msg.attach(MIMEText(message))
- mailServer = smtplib.SMTP('smtp.gmail.com', 587)
- mailServer.ehlo()
- mailServer.starttls()
- mailServer.ehlo()
- mailServer.login(gmailUser, gmailPassword)
- mailServer.sendmail(gmailUser, recipient, msg.as_string())
- mailServer.close()
- ################# SCRAPE #####################
- while next_button_exists:
- for div in call_to_action_divs:
- # Get websites
- website = get_contact_info('contact-url', div, attribute='href')
- if website:
- websites.append(website)
- # Get emails
- email = get_contact_info('contact-email', div, attribute='data-email')
- if email:
- emails.append(email)
- for div in heading_divs:
- heading = get_heading('listing-name', div)
- if heading:
- headings.append(heading)
- if check_button_exists('div.button-pagination-container a.navigation', attribute='href'):
- print('True')
- next_page()
- else:
- next_button_exists = False
- # change search category (browser.get)
- print(next_button_exists)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement