Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # -*- coding: UTF-8 -*-
- from selenium import webdriver
- from selenium.webdriver.chrome.options import Options
- from bs4 import BeautifulSoup
- import xlsxwriter
- from datetime import datetime
- start = datetime.now()
- url = 'https://www.xn--kalriaguru-ibb.hu/kaloriatablazat/kaloriatablazat.php'
- columns_name = ['left', 'right']
- chrome_options = Options()
- chrome_options.add_argument("--headless")
- driver = webdriver.Chrome(options=chrome_options)
- driver.get(url)
- driver.find_element_by_xpath('//*[@id="cookieBtn"]').click()
- page_soup = BeautifulSoup(driver.page_source, "html.parser")
- links = list()
- for col in columns_name:
- titles = page_soup("div", {"class": col})[0].contents
- for title in titles:
- if len(title) == 2:
- data = list()
- data.append(title.text)
- data.append("https://www.xn--kalriaguru-ibb.hu"+title.attrs['href'])
- links.append(data)
- materials = list()
- for link in links: # teszthez: links[1:2]
- print("Ezeket listázom jelenleg: ", link[0])
- driver.get(link[1])
- page_soup = BeautifulSoup(driver.page_source, "html.parser")
- calorieTable = page_soup("div", {"id": "calorieTable"})[0].contents[0].contents[1].contents
- for content in calorieTable:
- data = list()
- data.append('https://www.xn--kalriaguru-ibb.hu' + content.contents[0].contents[0].attrs['href'])
- for i in range(0, 6):
- data.append(content.contents[i].text)
- driver.get(data[0])
- page_soup = BeautifulSoup(driver.page_source, "html.parser")
- calorieDatas = page_soup("tbody", {"id": "calorieDatas"})
- data.append(calorieDatas[0].contents[17].contents[1].text.split('\xa0')[0]) # rost
- data.append(calorieDatas[0].contents[15].contents[1].text.split('\xa0')[0]) # cukor
- data.insert(5, calorieDatas[0].contents[7].contents[1].text.split('\xa0')[0]) # telített
- data.insert(6, calorieDatas[0].contents[9].contents[1].text.split('\xa0')[0]) # egyszeresen telítettlen
- data.insert(7, calorieDatas[0].contents[11].contents[1].text.split('\xa0')[0]) # többszörösen telítettlen
- materials.append(data)
- driver.quit()
- workbook = xlsxwriter.Workbook('kalóriatáblázat.xlsx')
- worksheet = workbook.add_worksheet()
- letterhead = ['Megnevezés', 'Energia', 'Fehérje', 'Zsír', 'Telített', 'Egyszeresen telítettlen',
- 'Többszörösen telítettlen', 'Szénhidrát', 'GI', 'Rost', 'Cukor']
- worksheet.write_row(0, 0, letterhead)
- row = 1
- for material in materials:
- worksheet.write_url(row, 0, material[0], string=material[1])
- worksheet.write_row(row, 1, material[2:])
- row += 1
- workbook.close()
- end = datetime.now()
- runtime = end - start
- print('Runtime: ', runtime.seconds, 'seconds')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement