SHARE
TWEET

Untitled

a guest May 22nd, 2019 62 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1.  
  2. import requests, bs4, sys, csv, datetime, logging, pandas
  3.  
  4. try:
  5.     now = datetime.datetime.now()
  6.     date = now.date()
  7.     time = now.time()
  8.     #the scraper axcepts two command line arguments - maker and model
  9.     #TO DO: validate input, apparently argparse is helpful
  10.     maker = sys.argv[1]
  11.     model = sys.argv[2]
  12.  
  13.     path = 'https://www.otomoto.pl/osobowe/' + maker + '/' + model
  14.     fileName = maker + '-' + model + '-' + str(now.date()) + '.csv'
  15.  
  16.     res = requests.get(path)
  17.     res.raise_for_status()
  18.  
  19.     #check how many pages are there
  20.     carSoup = bs4.BeautifulSoup(res.text, features="lxml")
  21.     lastPage = int(carSoup.select('.page')[-1].text)
  22.  
  23.     carsList = []
  24.     #iterate through pages
  25.     for i in range(1, lastPage):
  26.         res = requests.get(path + '?page=' + str(i))
  27.         res.raise_for_status()
  28.         currentPage = bs4.BeautifulSoup(res.text, features='lxml')
  29.         carList = currentPage.select('article.offer-item')
  30.         print("parsing page " + str(i))
  31.         for car in carList:
  32.             #get the interesting data and write to file
  33.             my_dict = {}
  34.  
  35.             price = car.find('span',class_='offer-price__number').text.strip().replace(" ", "")
  36.             my_dict["price"] = price
  37.             title = car.find('a',class_='offer-title__link').text.strip()
  38.             my_dict["title"] = title
  39.  
  40.             #Iterate through parameters
  41.             paramList = ["year", "mileage", "engine_capacity", "fuel_type"]
  42.             for param in paramList:
  43.                 currentParameter = car.find('li', {"data-code": param})
  44.                 if (currentParameter):
  45.                     my_dict[param] = currentParameter.text.strip()
  46.                 else:
  47.                     my_dict[param] = "-"
  48.            
  49.             carsList.append(my_dict)
  50.             # outputWriter.writerow(my_dict)
  51.    
  52.  
  53.     #prepare the file
  54.     fieldnames = carsList[0].keys()
  55.     with open(fileName, 'a', encoding="utf-8") as output_file:
  56.         outputWriter = csv.DictWriter(output_file, fieldnames=fieldnames, delimiter=';', lineterminator='\n', extrasaction='ignore')
  57.         outputWriter.writeheader()
  58.         outputWriter.writerows(carsList)
  59.  
  60.     # pandas.DataFrame(carsList).to_csv(fileName)
  61.     input('Finished, enter char to close window.')
  62. except Exception as e:
  63.     logging.error(f"{e}")
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top