Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # scrape.py
- # This gives our Python script the ability to interact with the operating system it is being run on — because the script is AppleScript, it will have to interact with OS X.
- import os
- # Apache2 Licensed HTTP library that lets us access all the HTTP verbs. This is useful because it does things like automatically adding query strings to URLs and adding parameters.
- import requests
- # Pull information from the HTML page we want to scrape.
- from bs4 import BeautifulSoup
- # Run the script at an interval of, say, every 24 hours for daily updates
- from apscheduler.schedulers.blocking import BlockingScheduler
- # Lets us access the current time and date during script execution.
- from datetime import datetime
- # Lets us access the apscheduler module through the sch variable.
- sch = BlockingScheduler
- # Set page we are scraping, the response from the get request to URL, and HTML of page coming in through the response.
- def main():
- # Go to indeed.com and search for "web developer", with Denver as filter.
- url = 'https://www.indeed.com/jobs?q=web%20developer&l=Denver%2C%20CO&vjk=0c0f7c56b3d79b4c'
- response = requests.get(url)
- html = response.content
- # Pass HTML through BeautifulSoup to sort through and pick out data we need in format readable by Python, then store in soup variable
- soup = BeautifulSoup(html, features="html.parser")
- # By inspect element, determine that class of job title posting is 'title'. Search through HTML with findAll method to find all HTML elements with name='div' and element attribute of class with value of 'title'.
- matches = soup.findAll(name='div', attrs={'class': 'title'})
- for jobTitle in matches:
- if "Junior" in jobTitle.text:
- os.system("osascript sendMessage.scpt 4694756018 'Check website' ")
- break
- elif "Jr" in jobTitle.text:
- os.system("osascript sendMessage.scpt 4694756018 'Check website' ")
- break
- return;
- sch.add_job(main, 'interval', seconds = 3)
- print('Press Ctrl+{0} to exit'.format('Break' if os.name == 'nt' else 'C'))
- try:
- sch.start()
- except (KeyboardInterrupt, SystemExit):
- pass
- main()
Advertisement
Add Comment
Please, Sign In to add comment