New - Shop

# coding: utf-8
import requests
from lxml import html
from threading import Thread, Lock
from Queue import Queue
from abc import abstractmethod

class MarketSearcher(object):    # Класс поиска в интернет-магазине

    def __init__(self, product=''):
        self.product = product.decode('utf-8')
        self._success = None
        self.page = None
        self.result = []

    def _check_input(self):    # Проверка непустого ввода
        if not self.product:
            while True:
                control = (raw_input('Product (type "exit" to exit): ')).decode('utf-8')
                if control:
                    self.product = control
                    break

    def _found_product(self):    # Определение, найден товар в магазине или нет
        respond = requests.get(self._request_params, {'find': self.product,
                                                      'sidx':'price', 'sord':'asc'})
        parsed = html.fromstring(respond.text)
        result = ''.join(parsed.xpath(self._not_found_xpath))
        if self._code_phrase in result:
            print 'Nothing found'
            self._success = None
            return False
        else:
            print 'Product found'
            self._success = parsed
            self.url = respond.url
            return True

    def _get_pages_num(self):    # Определение числа страниц товаров в магазине
        try:
            self.page = max(map(int, self._success.xpath(self._pages_xpath)))
        except ValueError:
            self.page = 1
        return self.page

    def _parse(self, pages, mutex):    # Запрос и извлечение информации из страницы для многопоточной обработки
        page = pages.get()
        respond = requests.get(self._many_url, {'find': self.product,
                                                'page': page, 'sidx': 'price',
                                                'sord': 'asc'})
        parsed = html.fromstring(respond.text)
        name = parsed.xpath(self._many_name_xpath)
        price = parsed.xpath(self._many_price_xpath)
        with mutex:
            self.result.append((name, price))

    def _get_result(self):    # Сбор результатов со страниц
        if self._one_product_url in self.url:
            name = self._success.xpath(self._one_name_xpath)
            price = self._success.xpath(self._one_price_xpath)
            self.result = [(name, price)]
            return self.result
        else:    # Если страниц несколько, то распределяет запросы по потокам
            pages = Queue()
            for num in xrange(1, self.page + 1):
                pages.put(num)
            threads =[]
            lock = Lock()
            for i in xrange(self.page if self.page <= 12 else 12):
                thread = Thread(target=self._parse, args=(pages, lock))
                thread.start()
                threads.append(thread)
            for thread in threads:
                thread.join()
            return self.result

    def _output(self):    # Вывод в файл
        with open('Result.txt', 'w') as file:
            for number, page in enumerate(self.result):
                file.write('\n\nPage %-100s\t\n\n' % (number + 1))
                for item in zip(*page):
                    file.write(('%-100s\t%s\n' % (item[0], item[1])).encode('cp1251'))

    def _end_search(self):    # Окончание поиска
        self.result = []
        self.page = None
        self._success = None
        self.product = ''

    @abstractmethod
    def start(self):    # Последовательность действий для поиска
        while True:
            self._check_input()
            if self.product != 'exit':
                if self._found_product():
                    self._get_pages_num()
                    self._get_result()
                    self._output()
                self._end_search()
            else:
                break


class TopShopSearch(MarketSearcher):    # Класс поиска в Top-Shop

    def __init__(self, product=''):    # Параметры для формирования запрсоов и сбора информации
        MarketSearcher.__init__(self, product)
        self.market = self.url = 'http://www.top-shop.ru/'
        self._request_params = self.market + 'search/'
        self._not_found_xpath = '//div[@class="result_text"]/text()'
        self._code_phrase = 'мы не нашли товаров'.decode('utf-8')
        self._pages_xpath = '//li[@class=" js_page"]/@data-num'
        self._one_product_url = '/product/'
        self._one_name_xpath = '//body/div[6]/div[2]/div/div/div/div/h1/text()'
        self._one_price_xpath = '//body/div[6]/div[3]/div[3]/div/div[2]/div/div[1]/text()'
        self._many_url = self.market + 'search/'
        self._many_name_xpath = '//span[@class="hidden js_ectrack"]/@data-name'
        self._many_price_xpath = '//span[@class="hidden js_ectrack"]/@data-price'


class EldoShopSearch(MarketSearcher):    # Посик в интернет-магазине Эльдорадо

    def __init__(self, product=''):    # Параметры
        MarketSearcher.__init__(self, product)
        self.market = self.url = 'http://www.eldorado.ru/'
        self._request_params = self.market + 'search/catalog.php'
        self._not_found_xpath = '//p[@class="paragraph searchInfoTitle"]/text()'
        self._code_phrase = 'ничего не найдено'.decode('utf-8')
        self._pages_xpath = '//div[@class="pages"]/a/text()'
        self._one_product_url = '/cat/detail'
        self._one_name_xpath = '//h1[@itemprop="name"]/text()'
        self._one_price_xpath = '//td/span[@itemprop="price"]/text()'
        self._many_url = self.market + 'search/catalog.php'
        self._many_name_xpath = '//div[@class="itemDescription"]/div[@class="itemTitle"]/a/text()'
        self._many_price_xpath = '//div[@class="priceContainer"]/div/span[@class="discountPrice itemPrice"]/text()'

    def _parse(self, pages, mutex):    # Переопределен запрос для многопоточной обработки
        page = pages.get()
        respond = requests.get(self._many_url, {'q': self.product,
                                                'page': page, 'sort': 'price',
                                                'type': 'asc',
                                                "list_num" : 50})
        parsed = html.fromstring(respond.text)
        name = parsed.xpath(self._many_name_xpath)
        price = parsed.xpath(self._many_price_xpath)
        with mutex:
            self.result.append((name, price))

    def _found_product(self):    # Переопределено определение успешности поиска
        respond = requests.get(self._request_params, {'q': self.product,
                                                      'sort':'price', 'type':'asc',
                                                      'list_num': 50})
        parsed = html.fromstring(respond.text)
        result = ''.join(parsed.xpath(self._not_found_xpath))
        if self._code_phrase in result:
            print 'Nothing found'
            self._success = None
            return False
        else:
            print 'Product found'
            self._success = parsed
            self.url = respond.url
            return True


if __name__ == '__main__':
    first = TopShopSearch()
    first.start()