Advertisement
Fsoky

Parser OLX Python

Mar 1st, 2022
1,787
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.40 KB | None | 0 0
  1. from bs4 import BeautifulSoup
  2. import requests
  3.  
  4. import pandas
  5.  
  6.  
  7. def save_in_excel(data):
  8.     titles = []
  9.     prices = []
  10.     links = []
  11.  
  12.     for d in data:
  13.         titles.append(d["title"])
  14.         prices.append(d["price"])
  15.         links.append(d["link"])
  16.  
  17.     df = pandas.DataFrame(
  18.         {
  19.             "Title": titles,
  20.             "Price": prices,
  21.             "Link": links
  22.         }
  23.     )
  24.     df.to_excel("parsed_list.xlsx")
  25.  
  26.  
  27. def parse():
  28.     URL = "https://www.olx.kz/elektronika/kompyutery-i-komplektuyuschie/"
  29.     HEADERS = {"User-Agent": "YOUR USER AGENT"}
  30.  
  31.     response = requests.get(URL, headers=HEADERS)
  32.     soup = BeautifulSoup(response.content, "html.parser")
  33.  
  34.     items = soup.find_all("div", class_="offer-wrapper")
  35.     comps = []
  36.  
  37.     for item in items:
  38.         try:
  39.             title = item.find("a", class_="marginright5 link linkWithHash detailsLink").text.strip() # Получаем содержимое
  40.             link = item.find("a", class_="marginright5 link linkWithHash detailsLink").get("href") # Получаем ссылку из тега
  41.             price = item.find("p", class_="price").text.strip()
  42.         except AttributeError:
  43.             continue
  44.  
  45.         comps.append(
  46.             {
  47.                 "title": title,
  48.                 "link": link,
  49.                 "price": price
  50.             }
  51.         )
  52.    
  53.     save_in_excel(comps)
  54.  
  55.  
  56. parse()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement