Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- ; diskover config file
- ; if you make any changes, restart worker bots so they get the new config
- [excludes]
- ; directory names and absolute paths you want to exclude from crawl, case-sensitive, can include wildcards (.* or backup* or /dir/dirname* or *tmp or *tmp* etc)
- dirs = .*,.snapshot,.Snapshot,.zfs,/data/nfs,/mnt/nfs
- ; files you want to exclude from crawl, case-sensitive, can include wildcards (.*, *.doc or NULLEXT for files with no extension)
- files = .*,Thumbs.db,.DS_Store,._.DS_Store,.localized,desktop.ini
- [includes]
- ; directory names and absolute paths you want to include (whitelist), case-sensitive, you don't need to whitelist rootdir (-d rootdir)
- ;dirs = .recycle
- ; files you want to include (whitelist), case-sensitive
- ;files =
- [autotag]
- ; pattern dictionaries for diskover bots to use when auto-tagging, values are case-sensitive, can include wildcard for ext, name or path (tmp* or TMP* or *tmp or *TMP* etc)
- ;files = [{"name": [], "name_exclude": [], "ext": ["tmp*", "TMP*", "temp*", "TEMP*", "cache*", "CACHE*"], "path": ["*/Application Support/*", "*/Containers/*"], "path_exclude": [], "mtime": 90, "atime": 0, "ctime": 90, "tag": "delete", "tag_custom": "autotag"}]
- ;dirs = [{"name": ["*tmp*", "*TMP*", "*temp*", "*TEMP*", "*Temp*", "*cache*", "*CACHE*", "*Cache*"], "name_exclude": ["*templates*", "*Templates*"], "path": ["*/Application Support/*", "*/Containers/*"], "path_exclude": [], "mtime": 90, "atime": 0, "ctime": 90, "tag": "delete", "tag_custom": "autotag"}]
- [elasticsearch]
- ; uncomment the below three lines if you are using AWS ES
- ;aws = False
- ;host = search-diskover-es-cluster-eg3yztrvzb6qucroyyjk2vokza.ap-northeast-1.es.amazonaws.com
- ;port = 443
- ; below two lines are for local ES, comment out if you are using AWS ES
- host = 192.168.1.222
- port = 9200
- ; uncomment the below two lines if you installed X-Pack, for http-auth
- user = elastic
- password = changeme
- ; index name for ES, cli arg overwrites this
- indexname = diskover-index
- ; timeout for connection to ES (default is 10)
- timeout = 30
- ; number of connections kept open to ES when crawling (default is 10)
- maxsize = 20
- ; max retries for ES operations (default is 0)
- maxretries = 10
- ; wait for at least yellow status before bulk uploading (default is False), set to True if you want to wait
- wait = False
- ; chunk size for ES bulk operations (default is 500)
- chunksize = 1000
- ; number of shards for index (default is 5)
- shards = 1
- ; number of replicas for index (default is 1)
- replicas = 0
- ; the below settings are to optimize ES for crawling
- ; index refresh interval (default is 1s), set to -1 to disable refresh during crawl (fastest performance but no index searches), after crawl is set back to 1s
- indexrefresh = 30s
- ; disable replicas during crawl - set to True to turn off replicas or False to keep on (default False), after crawl is set back to replicas value above
- disablereplicas = True
- ; transaction log flush threshold size (default 512mb)
- translogsize = 1gb
- [redis]
- host = 192.168.1.222
- port = 6379
- ;password =
- ; cache directory times in Redis
- ; used for -I index2 when comparing directory times to get metadata from index2 instead of off disk
- ; set to True to cache dir times or False to turn off (default False)
- cachedirtimes = False
- ; how long in seconds directory keys lives in Redis (default 1 day)
- dirtimesttl = 604800
- [treethreads]
- ; number of threads to use for tree walking down directories in rootdir (cores x 2 might be a good start)
- threads = 16
- [adaptivebatch]
- ; adaptive batch settings when using -a (intelligent crawling)
- ; batchsize (numbers of dirs) to start at
- startsize = 50
- ; maximum size of batch
- maxsize = 500
- ; when adjusting batch size use this for +/- (increase when queues is > 0, decrease when 0)
- stepsize = 10
- [workerbot]
- ; enable bot logs (True or False), bot logs will slow down crawl, use for debugging only
- botlogs = False
- ; log file directory to store worker logs
- ; log files are named diskover_bot_worker_<workername>_<time>_log
- logfiledir = /tmp
- ; time to wait (sec) before starting threads to help scrape file meta for long running rq jobs
- filethreadtime = 60
- [paths]
- ; used by diskover socket server
- ; path to diskover.py (default is ./diskover.py)
- diskoverpath = /app/diskover/diskover.py
- ; path to python executable (default is python)
- pythonpath = python
- [socketlistener]
- ; hostname and port (TCP) for diskover socket server for remote commands
- host = 0.0.0.0
- port = 9999
- [dupescheck]
- ; read size (bytes) for md5 sum check (how many bytes to read in at a time when md5 checking, default 64 KB)
- readsize = 65536
- ; max size (bytes) of files to check (files larger than this will be skipped, default 1 GB)
- maxsize = 1073741824
- ; bytes to check at start and end of file before doing md5 sum check (set large enough to account for file header info, default is 64)
- checkbytes = 64
- [crawlbot]
- ; continuous scanner
- ; time to sleep (seconds) between checking for directory changes
- sleeptime = 0.1
- ; number of threads for checking directories, setting this to num of cores x2 is a good starting point
- threads = 8
- [gource]
- ; should be set to same in diskover-gource.sh
- maxfilelag = 0.1
- [qumulo]
- ; Qumulo host
- ;cluster = 172.16.129.10
- ; Qumulo api user
- ;api_user = admin
- ; Qumulo api password
- ;api_password = admin
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement