Advertisement
Guest User

Untitled

a guest
Apr 15th, 2016
103
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.46 KB | None | 0 0
  1. ####
  2. ## In settings.py
  3. ####
  4.  
  5. BOT_NAME = 'tutorial'
  6.  
  7. SPIDER_MODULES = ['tutorial.spiders']
  8. NEWSPIDER_MODULE = 'tutorial.spiders'
  9.  
  10. #SPLASH_URL = 'http://192.168.99.100:8050'
  11.  
  12. DOWNLOADER_MIDDLEWARES = {
  13.   'scrapyjs.SplashMiddleware': 725,
  14.   'scrapy.downloadermiddlewares.httpauth.HttpAuthMiddleware': 300
  15. }
  16.  
  17. SPLASH_URL = 'http://3prg5eay-splash.scrapinghub.com'
  18.  
  19. DUPEFILTER_CLASS = 'scrapyjs.SplashAwareDupeFilter'
  20. HTTPCACHE_STORAGE = 'scrapyjs.SplashAwareFSCacheStorage'
  21.  
  22. CRAWLERA_ENABLED = True
  23. CRAWLERA_APIKEY = '0ec13d2a4b264e1a85173e0beda6f2eb'
  24. CONCURRENT_REQUESTS = 10
  25. DOWNLOAD_TIMEOUT = 3600
  26.  
  27. ####
  28. ## Spider
  29. ####
  30. import json
  31. import scrapy
  32. import requests
  33. import scrapyjs
  34. from tutorial.items import ProxyTestItem
  35.  
  36. class SplashSpider(scrapy.Spider):
  37.  
  38.     http_user = '5b0056be4659470ab4aaabfaf71239b1'
  39.     http_pass = ''
  40.  
  41.     name = "proxy_test"
  42.     download_delay = 2
  43.     start_urls = [
  44.                     "http://whatismyip.org"
  45.                   ]
  46.  
  47.     def start_requests(self):
  48.         for url in self.start_urls:
  49.             yield scrapy.Request(url, self.parse, meta={
  50.                 'splash': {
  51.                     'endpoint': 'execute',
  52.                     'args': {
  53.                       'url' : url
  54.                     }
  55.                 }
  56.             })
  57.  
  58.     def parse(self, response):
  59.         item = ProxyTestItem()
  60.         item['test'] = response.xpath('/html/body/div[2]/span/text()').extract()
  61.         yield item
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement