SHARE
TWEET

Untitled

a guest Oct 23rd, 2019 89 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. import bonobo
  2. import requests
  3. from bs4 import BeautifulSoup
  4. def scrape_zillow():
  5.     price = ''
  6.     status = ''
  7.     url = 'https://www.zillow.com/dallas-pa/?searchQueryState={%22pagination%22:{},%22mapBounds%22:{%22west%22:-76.33695602416992,%22east%22:-75.76223373413086,%22south%22:41.2735491004642,%22north%22:41.39780137597823},%22regionSelection%22:[{%22regionId%22:44845,%22regionType%22:6}],%22isMapVisible%22:true,%22mapZoom%22:11,%22filterState%22:{%22isForRent%22:{%22value%22:true}},%22isListVisible%22:true}'
  8.     r = requests.get(url, headers=headers)
  9.     if r.status_code == 200:
  10.         html = r.text.strip()
  11.         soup = BeautifulSoup(html, 'lxml')
  12.         price_status_section = soup.select('.list-card-price')
  13.         if len(price_status_section) > 1:
  14.             price = price_status_section[1].text.strip()
  15.     return price
  16. def scrape_redfin():
  17.     price = ''
  18.     status = ''
  19.     url = 'https://www.redfin.com/TX/Dallas/2619-Colby-St-75204/unit-B/home/32251730'
  20.     r = requests.get(url, headers=headers)
  21.     if r.status_code == 200:
  22.         html = r.text.strip()
  23.         soup = BeautifulSoup(html, 'lxml')
  24.         price_section = soup.select('.value.font-size-large')
  25.         if price_section:
  26.             price = price_section[0].text.strip()
  27.     return price
  28. def extract():
  29.     yield scrape_zillow()
  30.     yield scrape_redfin()
  31. def transform(price: str):
  32.     t_price = price.replace(',', '').lstrip('$')
  33.     return float(t_price)
  34. def load(price: float):
  35.     with open('pricing.txt', 'a+', encoding='utf8') as f:
  36.         f.write((str(price) + '\n'))
  37. if __name__ == '__main__':
  38.     headers = {
  39.         'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36',
  40.         'referrer': 'https://google.com'
  41.     }
  42.     # scrape_redfin()
  43.     graph = bonobo.Graph(
  44.         extract,
  45.         transform,
  46.         load,
  47.     )
  48.     bonobo.run(graph)
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
Not a member of Pastebin yet?
Sign Up, it unlocks many cool features!
 
Top