Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import re
- import requests
- import mysql.connector
- from mysql.connector import MySQLConnection, Error
- from bs4 import BeautifulSoup
- class DataBase:
- def __init__(self, user, password, db):
- self.user = user
- self.password = password
- self.db = db
- try:
- conn = self.get_conn()
- # if conn.is_connected():
- # print('Подключение прошло успешно.')
- except Error as e:
- print(e)
- def get_conn(self):
- conn = mysql.connector.connect(user=self.user,
- password=self.password,
- db=self.db)
- return conn
- def add_products(self, title, price, url):
- try:
- sql_fresh = 'INSERT INTO tesco.fresh (title, price, url) VALUES (%s, %s, %s)'
- sql_bakery = 'INSERT INTO tesco.bakery (title, price, url) VALUES (%s, %s, %s)'
- args = (title, price, url)
- conn = self.get_conn()
- c = conn.cursor()
- c.execute(sql_fresh, args)
- c.execute(sql_bakery, args)
- conn.commit()
- except Error as e:
- print(e)
- def write_database(data):
- db = DataBase('root', '123456', 'tesco')
- db.add_products(data['title'], data['price'], data['url'])
- class Tesco:
- def get_html(self, url):
- r = requests.get(url)
- return r.text
- def get_total_pages(self, html):
- soup = BeautifulSoup(html, 'lxml')
- total_pages =
- soup.find('nav', class_='pagination--page-selector-wrapper').find_all('a', class_='pagination--button')[
- -2].find('span').text
- return int(total_pages)
- def get_page_data(self, html):
- soup = BeautifulSoup(html, 'lxml')
- products = soup.find('div', class_='product-lists').find_all('li', class_='product-list--list-item')
- for product in products:
- try:
- offer = product.find('div', class_='yellow-square').text
- except:
- offer = ''
- if len(offer) == 0:
- try:
- title = product.find('div', class_='product-details--content').find('a',
- class_='product-tile--title').text
- except:
- title = ''
- try:
- price = float(product.find('div', class_='controls').find('span', class_='value').text)
- except:
- price = ''
- try:
- url = 'https://www.tesco.com' + product.find('div', class_='product-details--content').find('a',
- class_='product-tile--title').get(
- 'href')
- except:
- url = ''
- data = {
- 'title': title,
- 'price': price,
- 'url': url
- }
- write_database(data)
- def main():
- tesco = Tesco()
- base_url_fresh = 'https://www.tesco.com/groceries/en-GB/shop/fresh-food/all?offset='
- base_url_bakery = 'https://www.tesco.com/groceries/en-GB/shop/bakery/all?offset='
- print('Записываю Fresh.')
- for i in range(0, 2):
- print((i / 2) * 100)
- current_page = i * 24
- url_gen = base_url_fresh + str(current_page)
- html = tesco.get_html(url_gen)
- tesco.get_page_data(html)
- print('Записываю Bakery.')
- for i in range(0, 2):
- print((i / 2) * 100)
- current_page = i * 24
- url_gen = base_url_bakery + str(current_page)
- html = tesco.get_html(url_gen)
- tesco.get_page_data(html)
- if __name__ == '__main__':
- main()
Add Comment
Please, Sign In to add comment