Advertisement
Guest User

Untitled

a guest
Jun 24th, 2017
59
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.40 KB | None | 0 0
  1. import requests
  2. import pandas as pd
  3. #%pylab inline
  4. from bs4 import BeautifulSoup
  5. from urllib.parse import urljoin
  6.  
  7.  
  8. global listingCount
  9. listingCount = 0
  10.  
  11. print('Please Enter your phone number including the country code:')
  12. userCellphone = input()
  13. myCellPhone = userCellphone
  14.  
  15. print('What Would you like to search for?')
  16. mySearch = input()
  17.  
  18. print('Highest Price?')
  19. highPrice = input()
  20. highPrice = int(highPrice)
  21.  
  22. print('Lowest Price?')
  23. lowPrice = input()
  24. lowPrice = int(lowPrice)
  25.  
  26. URL = 'https://vancouver.craigslist.ca/search/sss?query=' + mySearch
  27. BASE = 'https://vancouver.craigslist.ca/'
  28.  
  29. print('Searching for a ' + mySearch + ' between the prices of $' + str(lowPrice) + ' and $' + str(highPrice) + ' using this url: ')
  30. #print(URL)
  31.  
  32. response = requests.get(URL)
  33. print(response.url)
  34.  
  35. html = response.content
  36.  
  37. soup = BeautifulSoup(response.text,"html.parser")
  38. #soup2 = BeautifulSoup(html,'html5lib') /*Do not use - Reference only*/
  39.  
  40. #(response.content or response.text?) /* Research this Why dump as payload (content), and not as text ? */
  41. #print(soup.prettify()[:1000]) /* Prints out HTML 1000 characters */
  42.  
  43. listing = soup.find_all('li', attrs={'class': 'result-row'})
  44. print('\n')
  45. print('There are ' + str(len(listing)) + ' total listings for ' + mySearch + ' with your keyword(s).')
  46. print('\n')
  47. #print('Which listing would you like to see? Enter a number between 1-' + str(len(listing)) + '.')
  48. #specified_listing = input()
  49. #specified_listing = int(specified_listing)
  50. #print('\n')
  51. #test_listing = listing[specified_listing]
  52. #print(test_listing.prettify())
  53. #cost = test_listing.find_all(attrs={'class': 'result-price'})[0].text
  54. #this_time = listing.find_all('time',attrs={'class':'result-date'})['datetime']
  55. #this_time = pd.to_datetime(this_time)
  56. #this_price = float(test_listing.find('span', attrs={'class':'result-price'}).text.strip('$'))
  57. #this_title = test_listing.find('a', attrs={'class': 'hdrlnk'}).text
  58. #print('\n')
  59. #print('\n')
  60. #print(str(cost) + ' - ' + str(this_title) + ' - ' + str(this_time))
  61. #print('\n')
  62. #print('\n'.join([str(i) for i in [cost, this_title, this_time]]))
  63. #print('There are ' + str(len(price)) + 'listings that fit your criteria.')
  64. print('\n')
  65.  
  66. for listing in soup.find_all('li', attrs={'class':'result-row'}):
  67. listingCount += 1
  68. if listing.find_all('span',attrs={'class':'result-price'}) != None:
  69. price = listing.find('span', {'class':'result-price'})
  70. if price != None:
  71. price = int(price.text[1:])
  72. #if price:
  73. #print(price.text)
  74. #price = int(price.text[1:])
  75. if price <= highPrice and price > lowPrice:
  76. print(str(listingCount) + ". Oh yes! This must be shown to the user since it costs: ")
  77. print( '$'+ str(price))
  78. # price = int(price.text[1:])
  79. # if listing.find_all('span',attrs={'class':'result-price'}) != None:
  80. # price = listing.text[2:6]
  81. # price = int(price)
  82. # print(listing.text[2:6])
  83. # price = int(price.lstrip('$')) #removes this element '\n$'
  84. # price = int(price[2:]) #manually removes first two characters
  85. # price = int(price.split('$')[-1]) #splits on $ and keeps the tail.
  86. #if price <= highPrice and price > lowPrice:
  87. #print("Oh yes! This must be shown to the user since it costs: " + str(price))
  88. # Now we get the name of the thing
  89. name = listing.find('a', {'class':'result-title'})
  90. #if name:
  91. if price <= highPrice and price > lowPrice:
  92. print(name.text)
  93. time = listing.find('time',{'class':'result-date'})['title']
  94. #if time:
  95. if price <= highPrice and price > lowPrice:
  96. print(time)
  97. # print (listing.text)
  98. link_end = listing.a['href']
  99. url = urljoin(BASE, link_end)
  100. print (url)
  101. print ("\n")
  102.  
  103. ############## BEGIN SAVING THE DATA ###############
  104. # import time module
  105. from datetime import datetime
  106. # This will remove weird characters that people put in titles like ****!***!!!
  107. #use_chars = str(ascii_letters) + ''.join([str(i) for i in range(10)]) + ' '
  108.  
  109. #Storage containers
  110.  
  111. link_list = [] #We'll store the data here
  112. link_list_send = [] #This is a list of links to be sent.
  113. send_list = [] #This is what will actually be sent in the text
  114.  
  115. while True:
  116. #resp = requests.get(url)
  117. #txt = bs4(resp.text, 'html.parser')
  118.  
  119. for listing in soup:
  120. if url not in link_list and url not in link_list_send:
  121. print('Found new listing')
  122. print( '$'+ str(price))
  123. print(name.text)
  124. print(time)
  125. print(url)
  126. link_list_send.append(url)
  127. send_list.append(name + ' - ' + BASE+link_end)
  128. if len(link_list_send) > 0:
  129. print('Sending a SMS alert to: ' + userCellphone + 'at ' + str(datetime.now().strftime('%Y-%m-%d %H:%M:%S')))
  130. message = '/n'.join(send_list)
  131. link_list += link_list_send
  132. link_list_send = []
  133. send_list = []
  134.  
  135. sleep_amt = np.random.randint(60, 120)
  136. time.sleep(sleep_amt)
  137.  
  138. ############## BEGIN SMS PORTION ###################
  139.  
  140. from twilio.rest import Client
  141. import CraigslistTwilioText
  142.  
  143. CraigslistTwilioText.SendText(message, userCellphone)
  144.  
  145. print('\n')
  146. print('Text alert sent to' + str(userCellphone) + 'successfully.')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement