Advertisement
Guest User

make.py

a guest
Feb 23rd, 2020
140
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.80 KB | None | 0 0
  1. # -*- coding: UTF-8 -*-
  2. from selenium import webdriver
  3. from selenium.webdriver.chrome.options import Options
  4. from bs4 import BeautifulSoup
  5. import xlsxwriter
  6. from datetime import datetime
  7.  
  8.  
  9. start = datetime.now()
  10. url = 'https://www.xn--kalriaguru-ibb.hu/kaloriatablazat/kaloriatablazat.php'
  11. columns_name = ['left', 'right']
  12.  
  13. chrome_options = Options()
  14. chrome_options.add_argument("--headless")
  15. driver = webdriver.Chrome(options=chrome_options)
  16. driver.get(url)
  17. driver.find_element_by_xpath('//*[@id="cookieBtn"]').click()
  18. page_soup = BeautifulSoup(driver.page_source, "html.parser")
  19.  
  20. links = list()
  21. for col in columns_name:
  22.     titles = page_soup("div", {"class": col})[0].contents
  23.     for title in titles:
  24.         if len(title) == 2:
  25.             data = list()
  26.             data.append(title.text)
  27.             data.append("https://www.xn--kalriaguru-ibb.hu"+title.attrs['href'])
  28.             links.append(data)
  29.  
  30. materials = list()
  31. for link in links:          # teszthez: links[1:2]
  32.     print("Ezeket listázom jelenleg: ", link[0])
  33.     driver.get(link[1])
  34.     page_soup = BeautifulSoup(driver.page_source, "html.parser")
  35.     calorieTable = page_soup("div", {"id": "calorieTable"})[0].contents[0].contents[1].contents
  36.     for content in calorieTable:
  37.         data = list()
  38.         data.append('https://www.xn--kalriaguru-ibb.hu' + content.contents[0].contents[0].attrs['href'])
  39.         for i in range(0, 6):
  40.             data.append(content.contents[i].text)
  41.  
  42.         driver.get(data[0])
  43.         page_soup = BeautifulSoup(driver.page_source, "html.parser")
  44.         calorieDatas = page_soup("tbody", {"id": "calorieDatas"})
  45.         data.append(calorieDatas[0].contents[17].contents[1].text.split('\xa0')[0])         # rost
  46.         data.append(calorieDatas[0].contents[15].contents[1].text.split('\xa0')[0])         # cukor
  47.         data.insert(5, calorieDatas[0].contents[7].contents[1].text.split('\xa0')[0])       # telített
  48.         data.insert(6, calorieDatas[0].contents[9].contents[1].text.split('\xa0')[0])       # egyszeresen telítettlen
  49.         data.insert(7, calorieDatas[0].contents[11].contents[1].text.split('\xa0')[0])      # többszörösen telítettlen
  50.  
  51.         materials.append(data)
  52.  
  53.  
  54. driver.quit()
  55.  
  56. workbook = xlsxwriter.Workbook('kalóriatáblázat.xlsx')
  57. worksheet = workbook.add_worksheet()
  58. letterhead = ['Megnevezés', 'Energia', 'Fehérje', 'Zsír', 'Telített', 'Egyszeresen telítettlen',
  59.               'Többszörösen telítettlen', 'Szénhidrát', 'GI', 'Rost', 'Cukor']
  60. worksheet.write_row(0, 0, letterhead)
  61.  
  62. row = 1
  63. for material in materials:
  64.     worksheet.write_url(row, 0, material[0], string=material[1])
  65.     worksheet.write_row(row, 1, material[2:])
  66.     row += 1
  67.  
  68. workbook.close()
  69.  
  70. end = datetime.now()
  71. runtime = end - start
  72. print('Runtime: ', runtime.seconds, 'seconds')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement