defan752

Untitled

Aug 25th, 2019
135
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.08 KB | None | 0 0
  1. # scrape.py
  2.  
  3. # This gives our Python script the ability to interact with the operating system it is being run on — because the script is AppleScript, it will have to interact with OS X.
  4. import os
  5. # Apache2 Licensed HTTP library that lets us access all the HTTP verbs. This is useful because it does things like automatically adding query strings to URLs and adding parameters.
  6. import requests
  7. # Pull information from the HTML page we want to scrape.
  8. from bs4 import BeautifulSoup
  9. # Run the script at an interval of, say, every 24 hours for daily updates
  10. from apscheduler.schedulers.blocking import BlockingScheduler
  11. # Lets us access the current time and date during script execution.
  12. from datetime import datetime
  13.  
  14. # Lets us access the apscheduler module through the sch variable.
  15. sch = BlockingScheduler
  16.  
  17. # Set page we are scraping, the response from the get request to URL, and HTML of page coming in through the response.
  18. def main():
  19.     # Go to indeed.com and search for "web developer", with Denver as filter.
  20.     url = 'https://www.indeed.com/jobs?q=web%20developer&l=Denver%2C%20CO&vjk=0c0f7c56b3d79b4c'
  21.     response = requests.get(url)
  22.     html = response.content
  23.  
  24.     # Pass HTML through BeautifulSoup to sort through and pick out data we need in format readable by Python, then store in soup variable
  25.     soup = BeautifulSoup(html, features="html.parser")
  26.     # By inspect element, determine that class of job title posting is 'title'. Search through HTML with findAll method to find all HTML elements with name='div' and element attribute of class with value of 'title'.
  27.     matches = soup.findAll(name='div', attrs={'class': 'title'})
  28.  
  29.     for jobTitle in matches:
  30.         if "Junior" in jobTitle.text:
  31.             os.system("osascript sendMessage.scpt 4694756018 'Check website' ")
  32.             break
  33.  
  34.         elif "Jr" in jobTitle.text:
  35.             os.system("osascript sendMessage.scpt 4694756018 'Check website' ")
  36.             break
  37.  
  38.     return;
  39.  
  40.     sch.add_job(main, 'interval', seconds = 3)
  41.  
  42.     print('Press Ctrl+{0} to exit'.format('Break' if os.name == 'nt' else 'C'))
  43.  
  44.     try:
  45.         sch.start()
  46.     except (KeyboardInterrupt, SystemExit):
  47.         pass
  48. main()
Advertisement
Add Comment
Please, Sign In to add comment