Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import requests
- import bs4
- import io
- def clean_text(s):
- return s.strip().replace("\n","").replace("\r","").replace(" ","")
- search_url = 'https://www.otomoto.pl/osobowe/audi/a4/od-2004/?search%5Bfilter_enum_damaged%5D=0&search%5Bfilter_enum_no_accident%5D=1&\
- search%5Bbrand_program_id%5D%5B0%5D=&search%5Bcountry%5D='
- result = requests.get(search_url)
- cars_file = io.open('cars-search-data.txt', 'w', encoding="utf-8")
- cars_file.write("cena;nazwa;rocznik;przebieg;silnik;paliwo;\n")
- cars_html_data = bs4.BeautifulSoup(result.text, features="lxml")
- pages_count = int(cars_html_data.select('.page')[-1].text)
- print("total search pages = {}".format(pages_count))
- for index in range(1, pages_count):
- result = requests.get("{}&page={}".format(search_url, index))
- current_page = bs4.BeautifulSoup(result.text, features='lxml')
- cars_search_page = current_page.select('article.offer-item')
- for car_item in cars_search_page:
- price = clean_text(car_item.find('span', class_='offer-price__number').text.strip())
- cars_file.write("{};".format(price))
- title = car_item.find('a', class_='offer-title__link').text.strip()
- cars_file.write("{};".format(title))
- params = car_item.find_all("li", class_='offer-item__params-item')
- for param in params:
- cars_file.write("{};".format(clean_text(param.text)))
- cars_file.write('\n')
- cars_file.close()
- """
- example data.....
- 19500PLN;Audi A4 B7;2005;276000km;2500cm3;Diesel;
- 33900PLN;Audi A4 B8;2011;200000km;1800cm3;Benzyna;
- 44700PLN;Audi A4 B8;2013;202650km;2000cm3;Diesel;
- 19500PLN;Audi A4 B7;2007;281800km;1896cm3;Diesel;
- 193900PLN;Audi A4 B9;2018;8133km;1967cm3;Diesel;
- 164853PLN;Audi A4 B9;2019;5km;1984cm3;Benzyna;
- 119500PLN;Audi A4 B9;2017;44121km;1395cm3;Benzyna;
- 199400PLN;Audi A4 B9;2019;1km;2000cm3;Benzyna;
- 191352PLN;Audi A4 B9;2019;5264km;1984cm3;Benzyna;
- 239900PLN;Audi A4 B9;2019;5km;1968cm3;Diesel;
- """
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement