Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import re
- import time
- from pprint import pprint
- from threading import Thread
- from bs4 import BeautifulSoup
- from requests_html import HTMLSession
- def time_track(func):
- def surrogate(*args, **kwargs):
- started_at = time.time()
- result = func(*args, **kwargs)
- ended_at = time.time()
- elapsed = round(ended_at - started_at)
- print('')
- print(f'Time run func {elapsed} sec.')
- return result
- return surrogate
- class test_thread(Thread):
- def __init__(self, url, array, name, desc):
- super().__init__()
- self.url = url
- self.array = array
- self.name = name
- self.desc = desc
- def run(self):
- session = HTMLSession()
- r = session.get(self.url)
- page = BeautifulSoup(r.html.html, 'lxml')
- try:
- price = page.find("span", class_="current-price-value").text.strip()
- except:
- price = '0'
- try:
- code = page.find("div", class_='price-item-code').text.strip()
- except:
- code = ''
- product = {}
- product["name"] = self.name
- product["desc"] = self.desc
- product["link"] = self.url
- product["price"] = price
- product["code"] = code
- self.array.append(product)
- @time_track
- def main():
- session = HTMLSession()
- r = session.get('https://technopoint.ru/catalog/recipe/e351231ca6161134/2020-goda/')
- page = BeautifulSoup(r.html.html, 'lxml')
- divs = page.find_all("div", class_="n-catalog-product__main")
- baseurl = "https://technopoint.ru/"
- threads = []
- links = []
- array = []
- for div in divs:
- href = baseurl + div.find("a", class_="ui-link").get("href")
- links.append(href)
- name = div.find("a", class_="ui-link").text.strip()
- name = re.sub(r'\s{2,}', ' ', name).strip()
- desc = div.find("span", class_="product-info__title-description").text.strip()
- desc = re.sub(r'\s{2,}', ' ', desc).strip()
- threads = [test_thread(link, array, name, desc) for link in links]
- for thread in threads:
- thread.start()
- for thread in threads:
- thread.join()
- pprint(array)
- if __name__ == '__main__':
- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement