Advertisement
Guest User

Untitled

a guest
Oct 29th, 2017
153
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.08 KB | None | 0 0
  1. from bs4 import BeautifulSoup
  2. import requests
  3. import requests.exceptions
  4. from urllib.parse import urlsplit
  5. from collections import deque
  6. import re
  7. import smtplib
  8.  
  9. from selenium import webdriver
  10. from selenium.common.exceptions import NoSuchElementException
  11. from email.mime.multipart import MIMEMultipart
  12. from email.mime.text import MIMEText
  13.  
  14. browser = webdriver.Chrome('C:\Program Files (x86)\Google\Chrome\Application\chromedriver.exe')
  15. browser.get('https://www.yellowpages.com.au/search/listings?clue=restaurants&eventType=pagination&locationClue=Mackay+Region%2C+QLD&pageNumber=2&referredBy=UNKNOWN')
  16.  
  17. call_to_action_divs = browser.find_elements_by_class_name('call-to-action-group')
  18. heading_divs = browser.find_elements_by_class_name('listing-summary')
  19.  
  20. # Create lists
  21. websites = []
  22. emails = []
  23. headings = []
  24. next_button_exists = True
  25.  
  26. ################# DETERMINE FUNCTIONS #####################
  27.  
  28. # Get Email And Website
  29. def get_contact_info(contact_class, div, attribute='href'):
  30.     try:
  31.         return div.find_element_by_class_name(contact_class).get_attribute(attribute)
  32.     except:
  33.         return None
  34.  
  35. # Get Heading
  36. def get_heading(contact_class, div):
  37.     try:
  38.         return div.find_element_by_class_name(contact_class).text
  39.     except:
  40.         return None
  41.  
  42. def next_page():
  43.     element = browser.find_element_by_css_selector('div.button-pagination-container a.navigation').get_attribute('href')
  44.     browser.get(element)
  45.  
  46. def check_button_exists(contact_class, attribute='href'):
  47.     try:
  48.         browser.find_element_by_css_selector(contact_class).get_attribute(attribute)
  49.     except NoSuchElementException:
  50.         return False
  51.     return True
  52.  
  53. def send_mail(self):
  54.     gmailUser = 'myemail@gmail.com'
  55.     gmailPassword = 'P@ssw0rd'
  56.     recipient = 'sendto@gmail.com'
  57.     message='your message here '
  58.  
  59.     msg = MIMEMultipart()
  60.     msg['From'] = gmailUser
  61.     msg['To'] = recipient
  62.     msg['Subject'] = "Subject of the email"
  63.     msg.attach(MIMEText(message))
  64.  
  65.     mailServer = smtplib.SMTP('smtp.gmail.com', 587)
  66.     mailServer.ehlo()
  67.     mailServer.starttls()
  68.     mailServer.ehlo()
  69.     mailServer.login(gmailUser, gmailPassword)
  70.     mailServer.sendmail(gmailUser, recipient, msg.as_string())
  71.     mailServer.close()
  72.  
  73. ################# SCRAPE #####################
  74.  
  75. while next_button_exists:
  76.     for div in call_to_action_divs:
  77.         # Get websites
  78.         website = get_contact_info('contact-url', div, attribute='href')
  79.         if website:
  80.             websites.append(website)
  81.  
  82.         # Get emails
  83.         email = get_contact_info('contact-email', div, attribute='data-email')
  84.         if email:
  85.             emails.append(email)
  86.  
  87.     for div in heading_divs:
  88.         heading = get_heading('listing-name', div)
  89.         if heading:
  90.             headings.append(heading)
  91.  
  92.     if check_button_exists('div.button-pagination-container a.navigation', attribute='href'):
  93.         print('True')
  94.         next_page()
  95.  
  96.     else:
  97.         next_button_exists = False
  98.         # change search category (browser.get)
  99.         print(next_button_exists)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement