Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import scrapy
- from scrapy.loader import ItemLoader
- from JetScrape.items import ProductLoader, JetProduct
- import datetime
- from selenium import webdriver
- import time
- class JetSpider(scrapy.Spider):
- name = "jet"
- allowed_domains = ["jet.com"]
- with open("JetURL.txt", "rt") as f:
- start_urls = [url.strip() for url in f.readlines()]
- def __init__(self):
- scrapy.Spider.__init__(self)
- self.br = webdriver.Chrome()
- def _del_(self):
- self.br.close()
- def parse(self, response):
- self.br.get(response.url)
- time.sleep(3)
- Today = datetime.datetime.now()
- jetload = ProductLoader(item=JetProduct(), selector=self.br.page_source)
- jetload.add_xpath("jetprice", "//span[@class='formatted-value']/text()")
- jetload.add_xpath("jettitle", "//h1[@class='name']/text()")
- jetload.add_value("jetLast_Updated", Today)
- yield jetload.load_item()
Add Comment
Please, Sign In to add comment