Untitled

# -*- coding: utf-8 -*-

def start_requests(self):
    reader = csv.reader(open('urls1.csv'))

    for row in reader:
        url = row[0]
        # self.parse_detail(url)
        yield Request(url=url, callback=self.parse_detail(url))


def parse_detail(self, url):

    self.driver = webdriver.Chrome('/webdrivers/chromedriver')
    self.driver.get(url)
    sleep(10)

    sel = Selector(text=self.driver.page_source)
    # sleep(2)
    # self.logger.info('Sleeping for 2 seconds.')
    response = url
    sub_kor = sel.xpath('//meta[@property="og:title"]/@content').extract()
    sub_en = sel.xpath('//*[@class="section fr"]//*[@class="subTit"]/text()').extract()
    highlight_1 = sel.xpath('//meta[@property="og:description"]/@content').extract()
    main = sel.xpath('//meta[@property="og:image"]/@content').extract()
    category_1 = sel.xpath('//*[@id="locationArea"]/div/a/text()').extract()
    category_2 = sel.xpath('//*[@id="locationArea"]/strong/text()').extract()
    # table = sel.xpath('//*[@id="specInfoLayer"]//td').extract()
    noop_originpirce = sel.xpath('//*[@class="section fr"]//*[@class="realCost"]/text()').extract()
    noop_real_price = sel.xpath('//*[@class="section fr"]//span[@class="cost"]/text()').extract()
    real_price = sel.xpath('//*[@class="colorChip optionList"]//input[@name="cost"]/@value').extract()
    stock_no = sel.xpath('//*[@class="colorChip optionList"]//*[contains(@id, "stock")]/@value').extract()
    options = sel.xpath('//*[@class="colorChip optionList"]//@title').extract()
    brand = sel.xpath('//span[@class="brand"]/text()').extract_first()
    rating = sel.xpath('//*[@class="starArea"]/span/text()').extract()
    description = sel.xpath('//*[@id="proExplain"]//p').extract()
    image_urls = sel.xpath('//*[@class="thumList"]/li/a/img/@src').extract()
    volume = sel.xpath('//*[@id="specInfoLayer"]//tbody/tr[1]/td/text()').extract()
    skin_type = sel.xpath('//*[@id="specInfoLayer"]//tbody/tr[2]').extract()
    expire_date = sel.xpath('//*[@id="specInfoLayer"]//tbody/tr[3]').extract()
    method = sel.xpath('//*[@id="specInfoLayer"]//tbody/tr[4]').extract()
    manufature = sel.xpath('//*[@id="specInfoLayer"]//tbody/tr[5]').extract()
    ingridient = sel.xpath('//*[@id="specInfoLayer"]//tbody/tr[6]').extract()


    for idx, option in enumerate(options):

        yield  {'Option': option[idx],
               # 'A': a,
               'Volume': volume,
               'Skin_type': skin_type,
               'Expire_date': expire_date,
               'Method': method,
               'Manufature': manufature,
               'Url': url,
               'Sub_kor': sub_kor,
               'Sub_en': sub_en,
               'Highlight': highlight_1,
               'Noop_Origin_price': noop_originpirce,
               'Noop_real_price': noop_real_price,
               'Real_price': real_price[idx],
               'Category_1': category_1,
               'Category_2': category_2,
               # 'Category_3': category_3,
               # 'Category_4': category_4,
               'Stock_no': stock_no,
               'Description': description,
               'Rating': rating,
               'Ingridient': ingridient,
               'Brand': brand,
               # 'Ingridient_text': ingridient_text,
               'Image_urls': image_urls,
               # 'Table_dts': table_dts,
               # 'Table_dds': table_dds,
               # 'Options': options[idx],
               # 'Brand': brand,
               # 'Table' : table,
               # 'Buyer_no': buyer_no,
               # 'Repurchase' : repurchase,
               'Main': main
               }


def close(self, reason):
    # pass
    csv_file = max(glob.iglob('*.csv'), key=os.path.getctime)

    wb = Workbook()
    ws = wb.active

    with open(csv_file, 'r') as f:
        for row in csv.reader(f):
            # row = row.encode('utf-8')
            try:
                ws.append(row)
            except:
                continue

    wb.save(csv_file.replace('.csv', '') + '.xlsx')enter code here