Guest User

Untitled

a guest
Nov 17th, 2017
62
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.92 KB | None | 0 0
  1. import scrapy
  2. from scrapy.loader import ItemLoader
  3. from JetScrape.items import ProductLoader, JetProduct
  4. import datetime
  5. from selenium import webdriver
  6. import time
  7.  
  8.  
  9.  
  10. class JetSpider(scrapy.Spider):
  11. name = "jet"
  12. allowed_domains = ["jet.com"]
  13. with open("JetURL.txt", "rt") as f:
  14. start_urls = [url.strip() for url in f.readlines()]
  15.  
  16. def __init__(self):
  17. scrapy.Spider.__init__(self)
  18. self.br = webdriver.Chrome()
  19.  
  20. def _del_(self):
  21. self.br.close()
  22.  
  23. def parse(self, response):
  24. self.br.get(response.url)
  25. time.sleep(3)
  26. Today = datetime.datetime.now()
  27. jetload = ProductLoader(item=JetProduct(), selector=self.br.page_source)
  28. jetload.add_xpath("jetprice", "//span[@class='formatted-value']/text()")
  29. jetload.add_xpath("jettitle", "//h1[@class='name']/text()")
  30. jetload.add_value("jetLast_Updated", Today)
  31. yield jetload.load_item()
Add Comment
Please, Sign In to add comment