Guest User

Untitled

a guest
Feb 26th, 2018
110
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.81 KB | None | 0 0
  1. import re
  2. import requests
  3. import mysql.connector
  4. from mysql.connector import MySQLConnection, Error
  5. from bs4 import BeautifulSoup
  6.  
  7.  
  8. class DataBase:
  9. def __init__(self, user, password, db):
  10. self.user = user
  11. self.password = password
  12. self.db = db
  13. try:
  14. conn = self.get_conn()
  15. # if conn.is_connected():
  16. # print('Подключение прошло успешно.')
  17. except Error as e:
  18. print(e)
  19.  
  20. def get_conn(self):
  21. conn = mysql.connector.connect(user=self.user,
  22. password=self.password,
  23. db=self.db)
  24. return conn
  25.  
  26. def add_products(self, title, price, url):
  27. try:
  28. sql_fresh = 'INSERT INTO tesco.fresh (title, price, url) VALUES (%s, %s, %s)'
  29. sql_bakery = 'INSERT INTO tesco.bakery (title, price, url) VALUES (%s, %s, %s)'
  30. args = (title, price, url)
  31. conn = self.get_conn()
  32. c = conn.cursor()
  33. c.execute(sql_fresh, args)
  34. c.execute(sql_bakery, args)
  35. conn.commit()
  36. except Error as e:
  37. print(e)
  38.  
  39.  
  40. def write_database(data):
  41. db = DataBase('root', '123456', 'tesco')
  42. db.add_products(data['title'], data['price'], data['url'])
  43.  
  44.  
  45. class Tesco:
  46. def get_html(self, url):
  47. r = requests.get(url)
  48. return r.text
  49.  
  50. def get_total_pages(self, html):
  51. soup = BeautifulSoup(html, 'lxml')
  52. total_pages =
  53. soup.find('nav', class_='pagination--page-selector-wrapper').find_all('a', class_='pagination--button')[
  54. -2].find('span').text
  55.  
  56. return int(total_pages)
  57.  
  58. def get_page_data(self, html):
  59. soup = BeautifulSoup(html, 'lxml')
  60. products = soup.find('div', class_='product-lists').find_all('li', class_='product-list--list-item')
  61.  
  62. for product in products:
  63. try:
  64. offer = product.find('div', class_='yellow-square').text
  65. except:
  66. offer = ''
  67.  
  68. if len(offer) == 0:
  69. try:
  70. title = product.find('div', class_='product-details--content').find('a',
  71. class_='product-tile--title').text
  72. except:
  73. title = ''
  74.  
  75. try:
  76. price = float(product.find('div', class_='controls').find('span', class_='value').text)
  77. except:
  78. price = ''
  79.  
  80. try:
  81. url = 'https://www.tesco.com' + product.find('div', class_='product-details--content').find('a',
  82. class_='product-tile--title').get(
  83. 'href')
  84. except:
  85. url = ''
  86.  
  87. data = {
  88. 'title': title,
  89. 'price': price,
  90. 'url': url
  91. }
  92.  
  93. write_database(data)
  94.  
  95.  
  96. def main():
  97. tesco = Tesco()
  98.  
  99. base_url_fresh = 'https://www.tesco.com/groceries/en-GB/shop/fresh-food/all?offset='
  100. base_url_bakery = 'https://www.tesco.com/groceries/en-GB/shop/bakery/all?offset='
  101.  
  102. print('Записываю Fresh.')
  103. for i in range(0, 2):
  104. print((i / 2) * 100)
  105. current_page = i * 24
  106. url_gen = base_url_fresh + str(current_page)
  107. html = tesco.get_html(url_gen)
  108. tesco.get_page_data(html)
  109.  
  110. print('Записываю Bakery.')
  111. for i in range(0, 2):
  112. print((i / 2) * 100)
  113. current_page = i * 24
  114. url_gen = base_url_bakery + str(current_page)
  115. html = tesco.get_html(url_gen)
  116. tesco.get_page_data(html)
  117.  
  118.  
  119. if __name__ == '__main__':
  120. main()
Add Comment
Please, Sign In to add comment