Advertisement
viking_unet

Untitled

Jun 10th, 2020
1,323
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.36 KB | None | 0 0
  1. import re
  2. import requests
  3. from bs4 import BeautifulSoup
  4.  
  5. URL = 'https://auto.ria.com/newauto/marka-jeep/'
  6. HEADERS = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:71.0) Gecko/20100101 Firefox/71.0', 'accept': '*/*'}
  7. HOST = 'https://auto.ria.com'
  8.  
  9.  
  10. def get_html(url, params=None):
  11.     r = requests.get(url, headers=HEADERS, params=params)
  12.     return r
  13.  
  14.  
  15. def get_content(html):
  16.     soup = BeautifulSoup(html, 'html.parser')
  17.     items = soup.find_all('div', class_='proposition')
  18.     #for item in items: print(item)
  19.  
  20.     cars = []
  21.     for item in items:
  22.         usd_price, uah_price = map(str.strip, item.find('div', class_='proposition_price').get_text().split('•'))
  23.         #print(usd_price)
  24.         cars.append({
  25.             'title': item.find('div', class_='proposition_title').find('strong').get_text(strip=True),
  26.             'link': HOST + item.find('div', class_='proposition_title').find('a').get('href'),
  27.             'usd_price': usd_price,
  28.             'uah_price' : uah_price,
  29.             'city': item.find('div', class_=re.compile('^proposition_region')).find('strong').get_text(),
  30.         })
  31.     return cars
  32.  
  33.  
  34. def parse():
  35.     html = get_html(URL)
  36.     if html.status_code == 200:
  37.         #print(html.text)
  38.         cars = get_content(html.text)
  39.         for car in cars: print(car)
  40.     else:
  41.         print('Error')
  42.  
  43.  
  44. parse()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement