Guest User

Untitled

a guest
Jan 7th, 2018
126
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.97 KB | None | 0 0
  1. # -*- coding: utf-8 -*-
  2.  
  3. def start_requests(self):
  4. reader = csv.reader(open('urls1.csv'))
  5.  
  6. for row in reader:
  7. url = row[0]
  8. # self.parse_detail(url)
  9. yield Request(url=url, callback=self.parse_detail(url))
  10.  
  11.  
  12. def parse_detail(self, url):
  13.  
  14. self.driver = webdriver.Chrome('/webdrivers/chromedriver')
  15. self.driver.get(url)
  16. sleep(10)
  17.  
  18. sel = Selector(text=self.driver.page_source)
  19. # sleep(2)
  20. # self.logger.info('Sleeping for 2 seconds.')
  21. response = url
  22. sub_kor = sel.xpath('//meta[@property="og:title"]/@content').extract()
  23. sub_en = sel.xpath('//*[@class="section fr"]//*[@class="subTit"]/text()').extract()
  24. highlight_1 = sel.xpath('//meta[@property="og:description"]/@content').extract()
  25. main = sel.xpath('//meta[@property="og:image"]/@content').extract()
  26. category_1 = sel.xpath('//*[@id="locationArea"]/div/a/text()').extract()
  27. category_2 = sel.xpath('//*[@id="locationArea"]/strong/text()').extract()
  28. # table = sel.xpath('//*[@id="specInfoLayer"]//td').extract()
  29. noop_originpirce = sel.xpath('//*[@class="section fr"]//*[@class="realCost"]/text()').extract()
  30. noop_real_price = sel.xpath('//*[@class="section fr"]//span[@class="cost"]/text()').extract()
  31. real_price = sel.xpath('//*[@class="colorChip optionList"]//input[@name="cost"]/@value').extract()
  32. stock_no = sel.xpath('//*[@class="colorChip optionList"]//*[contains(@id, "stock")]/@value').extract()
  33. options = sel.xpath('//*[@class="colorChip optionList"]//@title').extract()
  34. brand = sel.xpath('//span[@class="brand"]/text()').extract_first()
  35. rating = sel.xpath('//*[@class="starArea"]/span/text()').extract()
  36. description = sel.xpath('//*[@id="proExplain"]//p').extract()
  37. image_urls = sel.xpath('//*[@class="thumList"]/li/a/img/@src').extract()
  38. volume = sel.xpath('//*[@id="specInfoLayer"]//tbody/tr[1]/td/text()').extract()
  39. skin_type = sel.xpath('//*[@id="specInfoLayer"]//tbody/tr[2]').extract()
  40. expire_date = sel.xpath('//*[@id="specInfoLayer"]//tbody/tr[3]').extract()
  41. method = sel.xpath('//*[@id="specInfoLayer"]//tbody/tr[4]').extract()
  42. manufature = sel.xpath('//*[@id="specInfoLayer"]//tbody/tr[5]').extract()
  43. ingridient = sel.xpath('//*[@id="specInfoLayer"]//tbody/tr[6]').extract()
  44.  
  45.  
  46.  
  47. for idx, option in enumerate(options):
  48.  
  49. yield {'Option': option[idx],
  50. # 'A': a,
  51. 'Volume': volume,
  52. 'Skin_type': skin_type,
  53. 'Expire_date': expire_date,
  54. 'Method': method,
  55. 'Manufature': manufature,
  56. 'Url': url,
  57. 'Sub_kor': sub_kor,
  58. 'Sub_en': sub_en,
  59. 'Highlight': highlight_1,
  60. 'Noop_Origin_price': noop_originpirce,
  61. 'Noop_real_price': noop_real_price,
  62. 'Real_price': real_price[idx],
  63. 'Category_1': category_1,
  64. 'Category_2': category_2,
  65. # 'Category_3': category_3,
  66. # 'Category_4': category_4,
  67. 'Stock_no': stock_no,
  68. 'Description': description,
  69. 'Rating': rating,
  70. 'Ingridient': ingridient,
  71. 'Brand': brand,
  72. # 'Ingridient_text': ingridient_text,
  73. 'Image_urls': image_urls,
  74. # 'Table_dts': table_dts,
  75. # 'Table_dds': table_dds,
  76. # 'Options': options[idx],
  77. # 'Brand': brand,
  78. # 'Table' : table,
  79. # 'Buyer_no': buyer_no,
  80. # 'Repurchase' : repurchase,
  81. 'Main': main
  82. }
  83.  
  84.  
  85.  
  86.  
  87. def close(self, reason):
  88. # pass
  89. csv_file = max(glob.iglob('*.csv'), key=os.path.getctime)
  90.  
  91. wb = Workbook()
  92. ws = wb.active
  93.  
  94. with open(csv_file, 'r') as f:
  95. for row in csv.reader(f):
  96. # row = row.encode('utf-8')
  97. try:
  98. ws.append(row)
  99. except:
  100. continue
  101.  
  102. wb.save(csv_file.replace('.csv', '') + '.xlsx')enter code here
Add Comment
Please, Sign In to add comment