Advertisement
Guest User

Untitled

a guest
Apr 7th, 2020
132
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.25 KB | None | 0 0
  1. import re
  2. import time
  3. from pprint import pprint
  4. from threading import Thread
  5. from bs4 import BeautifulSoup
  6. from requests_html import HTMLSession
  7.  
  8.  
  9. def time_track(func):
  10.     def surrogate(*args, **kwargs):
  11.         started_at = time.time()
  12.         result = func(*args, **kwargs)
  13.         ended_at = time.time()
  14.         elapsed = round(ended_at - started_at)
  15.         print('')
  16.         print(f'Time run func {elapsed} sec.')
  17.         return result
  18.     return surrogate
  19.  
  20.  
  21. class test_thread(Thread):
  22.  
  23.     def __init__(self, url, array, name, desc):
  24.         super().__init__()
  25.         self.url = url
  26.         self.array = array
  27.         self.name = name
  28.         self.desc = desc
  29.  
  30.     def run(self):
  31.         session = HTMLSession()
  32.         r = session.get(self.url)
  33.         page = BeautifulSoup(r.html.html, 'lxml')
  34.         try:
  35.             price = page.find("span", class_="current-price-value").text.strip()
  36.         except:
  37.             price = '0'
  38.         try:
  39.             code = page.find("div", class_='price-item-code').text.strip()
  40.         except:
  41.             code = ''
  42.         product = {}
  43.         product["name"] = self.name
  44.         product["desc"] = self.desc
  45.         product["link"] = self.url
  46.         product["price"] = price
  47.         product["code"] = code
  48.         self.array.append(product)
  49.  
  50.  
  51. @time_track
  52. def main():
  53.     session = HTMLSession()
  54.     r = session.get('https://technopoint.ru/catalog/recipe/e351231ca6161134/2020-goda/')
  55.     page = BeautifulSoup(r.html.html, 'lxml')
  56.     divs = page.find_all("div", class_="n-catalog-product__main")
  57.     baseurl = "https://technopoint.ru/"
  58.     threads = []
  59.     links = []
  60.     array = []
  61.     for div in divs:
  62.         href = baseurl + div.find("a", class_="ui-link").get("href")
  63.         links.append(href)
  64.         name = div.find("a", class_="ui-link").text.strip()
  65.         name = re.sub(r'\s{2,}', ' ', name).strip()
  66.         desc = div.find("span", class_="product-info__title-description").text.strip()
  67.         desc = re.sub(r'\s{2,}', ' ', desc).strip()
  68.     threads = [test_thread(link, array, name, desc) for link in links]
  69.     for thread in threads:
  70.         thread.start()
  71.     for thread in threads:
  72.         thread.join()
  73.     pprint(array)
  74.  
  75.  
  76. if __name__ == '__main__':
  77.     main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement