Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import requests
- import pandas as pd
- #%pylab inline
- from bs4 import BeautifulSoup
- from urllib.parse import urljoin
- global listingCount
- listingCount = 0
- print('Please Enter your phone number including the country code:')
- userCellphone = input()
- myCellPhone = userCellphone
- print('What Would you like to search for?')
- mySearch = input()
- print('Highest Price?')
- highPrice = input()
- highPrice = int(highPrice)
- print('Lowest Price?')
- lowPrice = input()
- lowPrice = int(lowPrice)
- URL = 'https://vancouver.craigslist.ca/search/sss?query=' + mySearch
- BASE = 'https://vancouver.craigslist.ca/'
- print('Searching for a ' + mySearch + ' between the prices of $' + str(lowPrice) + ' and $' + str(highPrice) + ' using this url: ')
- #print(URL)
- response = requests.get(URL)
- print(response.url)
- html = response.content
- soup = BeautifulSoup(response.text,"html.parser")
- #soup2 = BeautifulSoup(html,'html5lib') /*Do not use - Reference only*/
- #(response.content or response.text?) /* Research this Why dump as payload (content), and not as text ? */
- #print(soup.prettify()[:1000]) /* Prints out HTML 1000 characters */
- listing = soup.find_all('li', attrs={'class': 'result-row'})
- print('\n')
- print('There are ' + str(len(listing)) + ' total listings for ' + mySearch + ' with your keyword(s).')
- print('\n')
- #print('Which listing would you like to see? Enter a number between 1-' + str(len(listing)) + '.')
- #specified_listing = input()
- #specified_listing = int(specified_listing)
- #print('\n')
- #test_listing = listing[specified_listing]
- #print(test_listing.prettify())
- #cost = test_listing.find_all(attrs={'class': 'result-price'})[0].text
- #this_time = listing.find_all('time',attrs={'class':'result-date'})['datetime']
- #this_time = pd.to_datetime(this_time)
- #this_price = float(test_listing.find('span', attrs={'class':'result-price'}).text.strip('$'))
- #this_title = test_listing.find('a', attrs={'class': 'hdrlnk'}).text
- #print('\n')
- #print('\n')
- #print(str(cost) + ' - ' + str(this_title) + ' - ' + str(this_time))
- #print('\n')
- #print('\n'.join([str(i) for i in [cost, this_title, this_time]]))
- #print('There are ' + str(len(price)) + 'listings that fit your criteria.')
- print('\n')
- for listing in soup.find_all('li', attrs={'class':'result-row'}):
- listingCount += 1
- if listing.find_all('span',attrs={'class':'result-price'}) != None:
- price = listing.find('span', {'class':'result-price'})
- if price != None:
- price = int(price.text[1:])
- #if price:
- #print(price.text)
- #price = int(price.text[1:])
- if price <= highPrice and price > lowPrice:
- print(str(listingCount) + ". Oh yes! This must be shown to the user since it costs: ")
- print( '$'+ str(price))
- # price = int(price.text[1:])
- # if listing.find_all('span',attrs={'class':'result-price'}) != None:
- # price = listing.text[2:6]
- # price = int(price)
- # print(listing.text[2:6])
- # price = int(price.lstrip('$')) #removes this element '\n$'
- # price = int(price[2:]) #manually removes first two characters
- # price = int(price.split('$')[-1]) #splits on $ and keeps the tail.
- #if price <= highPrice and price > lowPrice:
- #print("Oh yes! This must be shown to the user since it costs: " + str(price))
- # Now we get the name of the thing
- name = listing.find('a', {'class':'result-title'})
- #if name:
- if price <= highPrice and price > lowPrice:
- print(name.text)
- time = listing.find('time',{'class':'result-date'})['title']
- #if time:
- if price <= highPrice and price > lowPrice:
- print(time)
- # print (listing.text)
- link_end = listing.a['href']
- url = urljoin(BASE, link_end)
- print (url)
- print ("\n")
- ############## BEGIN SAVING THE DATA ###############
- # import time module
- from datetime import datetime
- # This will remove weird characters that people put in titles like ****!***!!!
- #use_chars = str(ascii_letters) + ''.join([str(i) for i in range(10)]) + ' '
- #Storage containers
- link_list = [] #We'll store the data here
- link_list_send = [] #This is a list of links to be sent.
- send_list = [] #This is what will actually be sent in the text
- while True:
- #resp = requests.get(url)
- #txt = bs4(resp.text, 'html.parser')
- for listing in soup:
- if url not in link_list and url not in link_list_send:
- print('Found new listing')
- print( '$'+ str(price))
- print(name.text)
- print(time)
- print(url)
- link_list_send.append(url)
- send_list.append(name + ' - ' + BASE+link_end)
- if len(link_list_send) > 0:
- print('Sending a SMS alert to: ' + userCellphone + 'at ' + str(datetime.now().strftime('%Y-%m-%d %H:%M:%S')))
- message = '/n'.join(send_list)
- link_list += link_list_send
- link_list_send = []
- send_list = []
- sleep_amt = np.random.randint(60, 120)
- time.sleep(sleep_amt)
- ############## BEGIN SMS PORTION ###################
- from twilio.rest import Client
- import CraigslistTwilioText
- CraigslistTwilioText.SendText(message, userCellphone)
- print('\n')
- print('Text alert sent to' + str(userCellphone) + 'successfully.')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement