Advertisement
enkryptor

Untitled

Jan 18th, 2022
822
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. import requests
  2. from bs4 import BeautifulSoup
  3. import csv
  4. import os
  5.  
  6. URL = 'https://auto.ria.com/uk/newauto/marka-mitsubishi/'
  7. HEADERS = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36 OPR/82.0.4227.50', 'accept': '*/*'}
  8. HOST = 'https://auto.ria.com'
  9. FILE = 'cars.csv'
  10.  
  11. def get_html(url, params=None):
  12.   r = requests.get(url, headers=HEADERS, params=params)
  13.   return r
  14.  
  15. def get_pages_count(html):
  16.   soup = BeautifulSoup(html, 'html.parser')
  17.   pagination = soup.find_all('span', class_='mhide')
  18.   if pagination:
  19.     return int(pagination[-1].get_text())
  20.   else:
  21.     return 1
  22.  
  23. def get_content(html):
  24.   soup = BeautifulSoup(html, 'html.parser')
  25.   items = soup.find_all('section', class_='proposition')
  26.   cars = []
  27.   for item in items:
  28.     uah = item.find('span', class_='size16')
  29.     if uah:
  30.       uah = uah.get_text()
  31.     else:
  32.       uah = 'Цену уточнить'
  33.     cars.append({
  34.       'title': item.find('div', class_='proposition_title').get_text(strip=True),
  35.       'link': HOST + item.find('a', class_='proposition_link').get('href'),
  36.       'dollars': item.find('span', class_='green').get_text(strip=True),
  37.       'uah': uah,
  38.       'city': item.find('span', class_='item region').get('title')
  39.  
  40.     })
  41.   return cars
  42.  
  43. def save_file(items, path):
  44.   with open(path, 'w', newline='') as file:
  45.     writer = csv.writer(file, delimiter=';')
  46.     writer.writerow(['Марка', 'Ссылка', 'Доллары', 'Гривны', 'Город'])
  47.     for item in items:
  48.       writer.writerow([item['title'], item['link'], item['dollars'], item['uah'], item['city']])
  49.  
  50. def parse():
  51.   URL = input('Enter URL: ')
  52.   URL = URL.strip()
  53.   html = get_html(URL)
  54.   if html.status_code == 200:
  55.     cars = []
  56.     pages_count = get_pages_count(html.text)
  57.     for page in range(1, pages_count + 1):
  58.       print(f'Парсинг страницы {page} из {pages_count}...')
  59.       html = get_html(URL, params={'page': page})
  60.       cars.extend(get_content(html.text))
  61.     save_file(cars, FILE)
  62.     print(f'Received {len(cars)} cars')
  63.     os.startfile(FILE)
  64.   else:
  65.     print('Error')
  66.  
  67.  
  68. parse()
Advertisement
Advertisement
Advertisement
RAW Paste Data Copied
Advertisement