Guest User

Untitled

a guest
Sep 18th, 2014
359
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.57 KB | None | 0 0
  1. # Automatically created by: slyd
  2. import os
  3.  
  4. ### More comprehensive list can be found at
  5. ### http://techpatterns.com/forums/about304.html
  6. USER_AGENT_LIST = [
  7. 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.36 Safari/535.7',
  8. 'Mozilla/5.0 (Windows NT 6.2; Win64; x64; rv:16.0) Gecko/16.0 Firefox/16.0',
  9. 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/534.55.3 (KHTML, like Gecko) Version/5.1.3 Safari/534.53.10'
  10. ]
  11. HTTP_PROXY = 'http://127.0.0.1:8123'
  12. DOWNLOADER_MIDDLEWARES = {
  13. 'scrap.middlewares.RandomUserAgentMiddleware': 5,
  14. 'scrap.middlewares.ProxyMiddleware': 410,
  15. 'scrapy.contrib.downloadermiddleware.useragent.UserAgentMiddleware': None
  16. # Disable compression middleware, so the actual HTML pages are cached
  17. }
  18.  
  19.  
  20. ELASTICSEARCH_SERVER = 'localhost' # If not 'localhost' prepend 'http://'
  21. ELASTICSEARCH_PORT = 9200 # If port 80 leave blank
  22. ELASTICSEARCH_USERNAME = ''
  23. ELASTICSEARCH_PASSWORD = ''
  24. ELASTICSEARCH_INDEX = 'scrapy'
  25. ELASTICSEARCH_TYPE = 'article'
  26. ELASTICSEARCH_UNIQ_KEY = 'url'
  27.  
  28. SPIDER_MANAGER_CLASS = 'slybot.spidermanager.ZipfileSlybotSpiderManager'
  29. EXTENSIONS = {'slybot.closespider.SlybotCloseSpider': 1}
  30. ITEM_PIPELINES = {'scrapyelasticsearch.ElasticSearchPipeline': 100}
  31. SPIDER_MIDDLEWARES = {'slybot.spiderlets.SpiderletsMiddleware': 999} # as close as possible to spider output
  32. SLYDUPEFILTER_ENABLED = True
  33.  
  34. PROJECT_ZIPFILE = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
  35.  
  36. try:
  37. from local_slybot_settings import *
  38. except ImportError:
  39. pass
Advertisement
Add Comment
Please, Sign In to add comment