Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import bonobo
- import requests
- from bs4 import BeautifulSoup
- def scrape_zillow():
- price = ''
- status = ''
- url = 'https://www.zillow.com/dallas-pa/?searchQueryState={%22pagination%22:{},%22mapBounds%22:{%22west%22:-76.33695602416992,%22east%22:-75.76223373413086,%22south%22:41.2735491004642,%22north%22:41.39780137597823},%22regionSelection%22:[{%22regionId%22:44845,%22regionType%22:6}],%22isMapVisible%22:true,%22mapZoom%22:11,%22filterState%22:{%22isForRent%22:{%22value%22:true}},%22isListVisible%22:true}'
- r = requests.get(url, headers=headers)
- if r.status_code == 200:
- html = r.text.strip()
- soup = BeautifulSoup(html, 'lxml')
- price_status_section = soup.select('.list-card-price')
- if len(price_status_section) > 1:
- price = price_status_section[1].text.strip()
- return price
- def scrape_redfin():
- price = ''
- status = ''
- url = 'https://www.redfin.com/TX/Dallas/2619-Colby-St-75204/unit-B/home/32251730'
- r = requests.get(url, headers=headers)
- if r.status_code == 200:
- html = r.text.strip()
- soup = BeautifulSoup(html, 'lxml')
- price_section = soup.select('.value.font-size-large')
- if price_section:
- price = price_section[0].text.strip()
- return price
- def extract():
- yield scrape_zillow()
- yield scrape_redfin()
- def transform(price: str):
- t_price = price.replace(',', '').lstrip('$')
- return float(t_price)
- def load(price: float):
- with open('pricing.txt', 'a+', encoding='utf8') as f:
- f.write((str(price) + '\n'))
- if __name__ == '__main__':
- headers = {
- 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36',
- 'referrer': 'https://google.com'
- }
- # scrape_redfin()
- graph = bonobo.Graph(
- extract,
- transform,
- load,
- )
- bonobo.run(graph)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement