Advertisement
eg0rmaffin

avitoParser

Dec 11th, 2020
638
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.79 KB | None | 0 0
  1. import requests
  2. from bs4 import BeautifulSoup
  3. import csv
  4.  
  5. def add_csv(data):
  6.     with open('gPixel.csv', 'a', encoding='utf-8') as file:
  7.         writer = csv.writer(file)
  8.         writer.writerow((
  9.             data['title'],
  10.             data['url'],
  11.             data['price']
  12.         ))
  13.  
  14. def get_html(url):
  15.     r = requests.get(url)
  16.     return r.text
  17.  
  18. def get_last(html):
  19.     soup = BeautifulSoup(html, 'lxml')
  20.  
  21.     pages = soup.find('div', class_='pagination-pages clearfix').find_all('a')[-1].get('href')
  22.     total_pages = int(pages.split('=')[1].split('&')[0])
  23.     return(total_pages)
  24.  
  25. def get_data(html):
  26.     soup = BeautifulSoup(html, 'lxml')
  27.  
  28.     blocks = soup.find_all('div', class_='iva-item-body-NPl6W')
  29.     print(type(blocks))
  30.     for block in blocks:
  31.         try:
  32.             name = block.find('div', class_='iva-item-titleStep-2bjuh').find('span').text
  33.             print(name)
  34.         except:
  35.             name: ''
  36.         try:
  37.             url = 'https://www.avito.ru' + block.find('div', class_='iva-item-titleStep-2bjuh').find('a').get('href')
  38.             print(url)
  39.         except:
  40.             url: ''
  41.         try:
  42.             price = block.find('div', class_='iva-item-priceStep-2qRpg').find('span', class_ = 'price-text-1HrJ_ text-text-1PdBw text-size-s-1PUdo').text
  43.             print(price)
  44.         except:
  45.             price: ''
  46.         dict = {
  47.             'title': name,
  48.             'url' : url,
  49.             'price': price,
  50.         }
  51.         add_csv(dict)
  52.  
  53.  
  54.  
  55.  
  56.  
  57.  
  58. url = 'https://www.avito.ru/moskva/telefony?q=google+pixel'
  59. base_url = 'https://www.avito.ru/moskva/telefony?p='
  60. end_url = '&q=google+pixel'
  61. a = get_html(url)
  62.  
  63. for i in range(1, get_last(a) + 1):
  64.     gen_url = base_url + str(i) + end_url
  65.     html = get_html(gen_url)
  66.     get_data(html)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement