Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- class bookspider(CrawlSpider):
- name = 'booking_spider'
- start_url = [
- 'https://www.tui.ru/ToursSearch//ToursSearch/Europe/Bulgaria.aspx']
- allowed_domains = ['https://www.tui.ru/ToursSearch']
- rules = (
- Rule(LinkExtractor(allow=('/ToursSearch/Europe/Bulgaria.aspx')), callback='parse_item'),)
- class TuiLoader(XPathItemLoader):
- default_output_processor = TakeFirst()
- def parse_item(self, response):
- item = TuiItem()
- item['cost'] = response.xpath("//*[@id='resultsHeader']/text()").extract()
- item['name'] = response.xpath("//*[@id='resultsHeader']/text()").extract()
- item['nights'] = response.xpath("//*[@id='resultsHeader']/text()").extract()
- item['country'] = response.xpath("//*[@id='resultsHeader']/text()").extract()
- return item
- class TuiItem(scrapy.Item):
- url = scrapy.Field()
- name = scrapy.Field()
- costs = scrapy.Field()
- nights = scrapy.Field()
- country = scrapy.Field()
- pass
- /home/garcia/tutorial/tutorial/spiders/booking_spider.py:19: ScrapyDeprecationWarning: tutorial.spiders.booking_spider.TuiLoader inherits from deprecated class scrapy.loader.XPathItemLoader, please inherit from scrapy.loader.ItemLoader. (warning only on first subclass, there may be others)
- class TuiLoader(XPathItemLoader):
- 2017-08-20 02:38:29 [scrapy.utils.log] INFO: Scrapy 1.4.0 started (bot: tutorial)
- 2017-08-20 02:38:29 [scrapy.utils.log] INFO: Overridden settings: {'BOT_NAME': 'tutorial', 'NEWSPIDER_MODULE': 'tutorial.spiders', 'ROBOTSTXT_OBEY': True, 'SPIDER_MODULES': ['tutorial.spiders']}
- 2017-08-20 02:38:29 [scrapy.middleware] INFO: Enabled extensions:
- ['scrapy.extensions.corestats.CoreStats',
- 'scrapy.extensions.telnet.TelnetConsole',
- 'scrapy.extensions.memusage.MemoryUsage',
- 'scrapy.extensions.logstats.LogStats']
- 2017-08-20 02:38:29 [scrapy.middleware] INFO: Enabled downloader middlewares:
- ['scrapy.downloadermiddlewares.robotstxt.RobotsTxtMiddleware',
- 'scrapy.downloadermiddlewares.httpauth.HttpAuthMiddleware',
- 'scrapy.downloadermiddlewares.downloadtimeout.DownloadTimeoutMiddleware',
- 'scrapy.downloadermiddlewares.defaultheaders.DefaultHeadersMiddleware',
- 'scrapy.downloadermiddlewares.useragent.UserAgentMiddleware',
- 'scrapy.downloadermiddlewares.retry.RetryMiddleware',
- 'scrapy.downloadermiddlewares.redirect.MetaRefreshMiddleware',
- 'scrapy.downloadermiddlewares.httpcompression.HttpCompressionMiddleware',
- 'scrapy.downloadermiddlewares.redirect.RedirectMiddleware',
- 'scrapy.downloadermiddlewares.cookies.CookiesMiddleware',
- 'scrapy.downloadermiddlewares.httpproxy.HttpProxyMiddleware',
- 'scrapy.downloadermiddlewares.stats.DownloaderStats']
- 2017-08-20 02:38:29 [scrapy.middleware] INFO: Enabled spider middlewares:
- ['scrapy.spidermiddlewares.httperror.HttpErrorMiddleware',
- 'scrapy.spidermiddlewares.offsite.OffsiteMiddleware',
- 'scrapy.spidermiddlewares.referer.RefererMiddleware',
- 'scrapy.spidermiddlewares.urllength.UrlLengthMiddleware',
- 'scrapy.spidermiddlewares.depth.DepthMiddleware']
- 2017-08-20 02:38:29 [scrapy.middleware] INFO: Enabled item pipelines:
- []
- 2017-08-20 02:38:29 [scrapy.core.engine] INFO: Spider opened
- 2017-08-20 02:38:29 [scrapy.extensions.logstats] INFO: Crawled 0 pages (at 0 pages/min), scraped 0 items (at 0 items/min)
- 2017-08-20 02:38:29 [scrapy.extensions.telnet] DEBUG: Telnet console listening on 127.0.0.1:6024
- 2017-08-20 02:38:29 [scrapy.core.engine] INFO: Closing spider (finished)
- 2017-08-20 02:38:29 [scrapy.statscollectors] INFO: Dumping Scrapy stats:
- {'finish_reason': 'finished',
- 'finish_time': datetime.datetime(2017, 8, 19, 23, 38, 29, 209335),
- 'log_count/DEBUG': 1,
- 'log_count/INFO': 7,
- 'memusage/max': 49655808,
- 'memusage/startup': 49655808,
- 'start_time': datetime.datetime(2017, 8, 19, 23, 38, 29, 204191)}
- 2017-08-20 02:38:29 [scrapy.core.engine] INFO: Spider closed (finished)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement