Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import requests
- from bs4 import BeautifulSoup
- import csv
- def add_csv(data):
- with open('gPixel.csv', 'a', encoding='utf-8') as file:
- writer = csv.writer(file)
- writer.writerow((
- data['title'],
- data['url'],
- data['price']
- ))
- def get_html(url):
- r = requests.get(url)
- return r.text
- def get_last(html):
- soup = BeautifulSoup(html, 'lxml')
- pages = soup.find('div', class_='pagination-pages clearfix').find_all('a')[-1].get('href')
- total_pages = int(pages.split('=')[1].split('&')[0])
- return(total_pages)
- def get_data(html):
- soup = BeautifulSoup(html, 'lxml')
- blocks = soup.find_all('div', class_='iva-item-body-NPl6W')
- print(type(blocks))
- for block in blocks:
- try:
- name = block.find('div', class_='iva-item-titleStep-2bjuh').find('span').text
- print(name)
- except:
- name: ''
- try:
- url = 'https://www.avito.ru' + block.find('div', class_='iva-item-titleStep-2bjuh').find('a').get('href')
- print(url)
- except:
- url: ''
- try:
- price = block.find('div', class_='iva-item-priceStep-2qRpg').find('span', class_ = 'price-text-1HrJ_ text-text-1PdBw text-size-s-1PUdo').text
- print(price)
- except:
- price: ''
- dict = {
- 'title': name,
- 'url' : url,
- 'price': price,
- }
- add_csv(dict)
- url = 'https://www.avito.ru/moskva/telefony?q=google+pixel'
- base_url = 'https://www.avito.ru/moskva/telefony?p='
- end_url = '&q=google+pixel'
- a = get_html(url)
- for i in range(1, get_last(a) + 1):
- gen_url = base_url + str(i) + end_url
- html = get_html(gen_url)
- get_data(html)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement