Advertisement
Guest User

Untitled

a guest
Apr 6th, 2020
310
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.61 KB | None | 0 0
  1. import sys
  2. import requests
  3. from bs4 import BeautifulSoup
  4. import csv
  5.  
  6.  
  7. # create csv file in folder
  8. f = csv.writer(open('user-offer.csv', 'w'))
  9. f.writerow(['Title', 'Location', 'number', 'description', 'pictures'])
  10.  
  11. url = 'https://www.olx.bg/ads/user/lpAO/'
  12. page = requests.get(url)
  13. # print(page.text)
  14.  
  15. if not page.status_code == 200:
  16.     exit()
  17.  
  18. html_soup = BeautifulSoup(page.content, "html.parser")
  19. # print(html_soup.prettify())
  20.  
  21. all_user_info = html_soup.find('div', id='listContainer')
  22. # print(all_user_info)
  23. user_name = html_soup.find('h3', class_='xxx-large')
  24. # print(user_name)
  25. if None in user_name:
  26.     print('You`r not login')
  27.     sys.exit()
  28.  
  29. all_user_offers = html_soup.find_all(class_='listHandler')
  30. # print(len(all_user_offers))    # колко обяви има
  31. if None in all_user_offers:
  32.     print('No offers')
  33.  
  34.  
  35. for offer in all_user_offers:
  36.     offer_title = offer.find_all('a', class_='marginright5 link linkWithHash detailsLink')
  37.     offer_price = offer.find_all('p', class_='price')
  38.     offer_img = offer.find_all('img', class_='fleft')
  39.     if None in (offer_title, offer_price, offer_img):
  40.         continue
  41.  
  42.     # print(offer_title)
  43.     # print(offer_price)
  44.     # print(offer_img)
  45.  
  46. offer_link_cont = html_soup.find_all('a', attrs={'class': 'marginright5'})
  47. # print(offer_link_cont)
  48.  
  49. urls = []
  50. for a_tag in offer_link_cont:
  51.     url = a_tag['href']
  52.     urls.append(url)
  53. # print(urls)
  54.  
  55. for user_url_offer in urls:
  56.     url_offer = user_url_offer
  57.  
  58.  
  59.     def user_offer_one():
  60.         url = url_offer
  61.         page = requests.get(url)
  62.         # print(page.text)
  63.  
  64.         if not page.status_code == 200:
  65.             exit()
  66.  
  67.         html_soup = BeautifulSoup(page.content, "html.parser")
  68.         # print(html_soup.prettify())
  69.  
  70.         offer_results = html_soup.find('div', id='offerdescription')
  71.         # print(all_offer.prettify())
  72.  
  73.         offer_title = offer_results.find('div', class_='offer-titlebox').h1
  74.         offer_location = offer_results.find('a', class_='show-map-link').strong
  75.         offer_number = offer_results.find('em').small
  76.         offer_description = offer_results.find('div', id='textContent')
  77.  
  78.         # print(offer_title.text)
  79.         # print(offer_location.text)
  80.         # print(offer_number.text)
  81.         # print(offer_description.text)
  82.  
  83.         for offer_img in offer_results:
  84.             offer_imgs = offer_description('div', class_='photo-glow')
  85.             # print(offer_img)
  86.  
  87.         f.writerow([offer_title, offer_location, offer_number, offer_description, offer_imgs])
  88.     user_offer_one()
  89.  
  90. # print(url_offer)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement