Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # -*- coding: utf-8 -*-
- def start_requests(self):
- reader = csv.reader(open('urls1.csv'))
- for row in reader:
- url = row[0]
- # self.parse_detail(url)
- yield Request(url=url, callback=self.parse_detail(url))
- def parse_detail(self, url):
- self.driver = webdriver.Chrome('/webdrivers/chromedriver')
- self.driver.get(url)
- sleep(10)
- sel = Selector(text=self.driver.page_source)
- # sleep(2)
- # self.logger.info('Sleeping for 2 seconds.')
- response = url
- sub_kor = sel.xpath('//meta[@property="og:title"]/@content').extract()
- sub_en = sel.xpath('//*[@class="section fr"]//*[@class="subTit"]/text()').extract()
- highlight_1 = sel.xpath('//meta[@property="og:description"]/@content').extract()
- main = sel.xpath('//meta[@property="og:image"]/@content').extract()
- category_1 = sel.xpath('//*[@id="locationArea"]/div/a/text()').extract()
- category_2 = sel.xpath('//*[@id="locationArea"]/strong/text()').extract()
- # table = sel.xpath('//*[@id="specInfoLayer"]//td').extract()
- noop_originpirce = sel.xpath('//*[@class="section fr"]//*[@class="realCost"]/text()').extract()
- noop_real_price = sel.xpath('//*[@class="section fr"]//span[@class="cost"]/text()').extract()
- real_price = sel.xpath('//*[@class="colorChip optionList"]//input[@name="cost"]/@value').extract()
- stock_no = sel.xpath('//*[@class="colorChip optionList"]//*[contains(@id, "stock")]/@value').extract()
- options = sel.xpath('//*[@class="colorChip optionList"]//@title').extract()
- brand = sel.xpath('//span[@class="brand"]/text()').extract_first()
- rating = sel.xpath('//*[@class="starArea"]/span/text()').extract()
- description = sel.xpath('//*[@id="proExplain"]//p').extract()
- image_urls = sel.xpath('//*[@class="thumList"]/li/a/img/@src').extract()
- volume = sel.xpath('//*[@id="specInfoLayer"]//tbody/tr[1]/td/text()').extract()
- skin_type = sel.xpath('//*[@id="specInfoLayer"]//tbody/tr[2]').extract()
- expire_date = sel.xpath('//*[@id="specInfoLayer"]//tbody/tr[3]').extract()
- method = sel.xpath('//*[@id="specInfoLayer"]//tbody/tr[4]').extract()
- manufature = sel.xpath('//*[@id="specInfoLayer"]//tbody/tr[5]').extract()
- ingridient = sel.xpath('//*[@id="specInfoLayer"]//tbody/tr[6]').extract()
- for idx, option in enumerate(options):
- yield {'Option': option[idx],
- # 'A': a,
- 'Volume': volume,
- 'Skin_type': skin_type,
- 'Expire_date': expire_date,
- 'Method': method,
- 'Manufature': manufature,
- 'Url': url,
- 'Sub_kor': sub_kor,
- 'Sub_en': sub_en,
- 'Highlight': highlight_1,
- 'Noop_Origin_price': noop_originpirce,
- 'Noop_real_price': noop_real_price,
- 'Real_price': real_price[idx],
- 'Category_1': category_1,
- 'Category_2': category_2,
- # 'Category_3': category_3,
- # 'Category_4': category_4,
- 'Stock_no': stock_no,
- 'Description': description,
- 'Rating': rating,
- 'Ingridient': ingridient,
- 'Brand': brand,
- # 'Ingridient_text': ingridient_text,
- 'Image_urls': image_urls,
- # 'Table_dts': table_dts,
- # 'Table_dds': table_dds,
- # 'Options': options[idx],
- # 'Brand': brand,
- # 'Table' : table,
- # 'Buyer_no': buyer_no,
- # 'Repurchase' : repurchase,
- 'Main': main
- }
- def close(self, reason):
- # pass
- csv_file = max(glob.iglob('*.csv'), key=os.path.getctime)
- wb = Workbook()
- ws = wb.active
- with open(csv_file, 'r') as f:
- for row in csv.reader(f):
- # row = row.encode('utf-8')
- try:
- ws.append(row)
- except:
- continue
- wb.save(csv_file.replace('.csv', '') + '.xlsx')enter code here
Add Comment
Please, Sign In to add comment