Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # Automatically created by: slyd
- import os
- ### More comprehensive list can be found at
- ### http://techpatterns.com/forums/about304.html
- USER_AGENT_LIST = [
- 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.36 Safari/535.7',
- 'Mozilla/5.0 (Windows NT 6.2; Win64; x64; rv:16.0) Gecko/16.0 Firefox/16.0',
- 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/534.55.3 (KHTML, like Gecko) Version/5.1.3 Safari/534.53.10'
- ]
- HTTP_PROXY = 'http://127.0.0.1:8123'
- DOWNLOADER_MIDDLEWARES = {
- 'scrap.middlewares.RandomUserAgentMiddleware': 5,
- 'scrap.middlewares.ProxyMiddleware': 410,
- 'scrapy.contrib.downloadermiddleware.useragent.UserAgentMiddleware': None
- # Disable compression middleware, so the actual HTML pages are cached
- }
- ELASTICSEARCH_SERVER = 'localhost' # If not 'localhost' prepend 'http://'
- ELASTICSEARCH_PORT = 9200 # If port 80 leave blank
- ELASTICSEARCH_USERNAME = ''
- ELASTICSEARCH_PASSWORD = ''
- ELASTICSEARCH_INDEX = 'scrapy'
- ELASTICSEARCH_TYPE = 'article'
- ELASTICSEARCH_UNIQ_KEY = 'url'
- SPIDER_MANAGER_CLASS = 'slybot.spidermanager.ZipfileSlybotSpiderManager'
- EXTENSIONS = {'slybot.closespider.SlybotCloseSpider': 1}
- ITEM_PIPELINES = {'scrapyelasticsearch.ElasticSearchPipeline': 100}
- SPIDER_MIDDLEWARES = {'slybot.spiderlets.SpiderletsMiddleware': 999} # as close as possible to spider output
- SLYDUPEFILTER_ENABLED = True
- PROJECT_ZIPFILE = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
- try:
- from local_slybot_settings import *
- except ImportError:
- pass
Advertisement
Add Comment
Please, Sign In to add comment