Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/python3
- # Import libraries
- from mechanicalsoup import StatefulBrowser
- import smtplib
- from os.path import expanduser
- directory = expanduser("~")+'/craigslist-scraper/'
- # Set message data
- # set email address
- name = 'Your Name'
- sender = 'your@email.address'
- receivers = [sender]
- sender_full = 'Craigslistscraper <'+sender+'>'
- receiver_full = name+' <'+sender+'>'
- subject = 'New posts'
- # set start of message start
- message = """From: {}
- To: {}
- Subject: {}""".format(sender_full, receiver_full, subject)
- # Try to read in list of post IDs
- try:
- post_list = open(directory+'/post_list.txt').read().split('\n')
- # If not set to empty list
- except FileNotFoundError:
- post_list = []
- # URL for seearch for bikes within 7.6 miles of 10005
- # with a maximum price of $100
- url = 'https://newyork.craigslist.org/d/bicycles/search/bia?postal=10005&search_distance=7.6&max_price=100'
- # Create browser objects
- br = StatefulBrowser()
- br_post = StatefulBrowser()
- # Open search URL and get page
- br.open(url)
- soup = br.get_current_page()
- # Loop through posts and append new posts to message
- new_posts = 0
- for link in soup.find_all('a'):
- href = link.get('href')
- if 'bik/d' in href:
- id = href.split('/')[-1].split('.')[0]
- if id not in post_list:
- new_posts += 1
- br_post.open(href)
- post_soup = br_post.get_current_page()
- title = post_soup.title.text
- post = post_soup.find(id='postingbody').text.split('\n')[-1]
- message = message+"\n\n\n{}\n\n{}\n\n{}".format(title, post, href)
- post_list.append(id)
- # Send message if there are new posts
- smtpObj = smtplib.SMTP('localhost')
- if new_posts>0:
- smtpObj.sendmail(sender, receivers, message.encode('utf-8'))
- print('{} new posts sent'.format(new_posts))
- # Write updated list of posts to file
- open(directory+'post_list.txt','w').write('\n'.join(post_list[-200:]))
- else:
- print('No new posts')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement