Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from bs4 import BeautifulSoup
- import requests
- import pandas
- def save_in_excel(data):
- titles = []
- prices = []
- links = []
- for d in data:
- titles.append(d["title"])
- prices.append(d["price"])
- links.append(d["link"])
- df = pandas.DataFrame(
- {
- "Title": titles,
- "Price": prices,
- "Link": links
- }
- )
- df.to_excel("parsed_list.xlsx")
- def parse():
- URL = "https://www.olx.kz/elektronika/kompyutery-i-komplektuyuschie/"
- HEADERS = {"User-Agent": "YOUR USER AGENT"}
- response = requests.get(URL, headers=HEADERS)
- soup = BeautifulSoup(response.content, "html.parser")
- items = soup.find_all("div", class_="offer-wrapper")
- comps = []
- for item in items:
- try:
- title = item.find("a", class_="marginright5 link linkWithHash detailsLink").text.strip() # Получаем содержимое
- link = item.find("a", class_="marginright5 link linkWithHash detailsLink").get("href") # Получаем ссылку из тега
- price = item.find("p", class_="price").text.strip()
- except AttributeError:
- continue
- comps.append(
- {
- "title": title,
- "link": link,
- "price": price
- }
- )
- save_in_excel(comps)
- parse()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement