Advertisement
Guest User

Untitled

a guest
Sep 27th, 2018
181
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.26 KB | None | 0 0
  1. ; diskover config file
  2. ; if you make any changes, restart worker bots so they get the new config
  3.  
  4. [excludes]
  5. ; directory names and absolute paths you want to exclude from crawl, case-sensitive, can include wildcards (.* or backup* or /dir/dirname* or *tmp or *tmp* etc)
  6. dirs = .*,.snapshot,.Snapshot,.zfs,/data/nfs,/mnt/nfs
  7. ; files you want to exclude from crawl, case-sensitive, can include wildcards (.*, *.doc or NULLEXT for files with no extension)
  8. files = .*,Thumbs.db,.DS_Store,._.DS_Store,.localized,desktop.ini
  9.  
  10. [includes]
  11. ; directory names and absolute paths you want to include (whitelist), case-sensitive, you don't need to whitelist rootdir (-d rootdir)
  12. ;dirs = .recycle
  13. ; files you want to include (whitelist), case-sensitive
  14. ;files =
  15.  
  16. [autotag]
  17. ; pattern dictionaries for diskover bots to use when auto-tagging, values are case-sensitive, can include wildcard for ext, name or path (tmp* or TMP* or *tmp or *TMP* etc)
  18. ;files = [{"name": [], "name_exclude": [], "ext": ["tmp*", "TMP*", "temp*", "TEMP*", "cache*", "CACHE*"], "path": ["*/Application Support/*", "*/Containers/*"], "path_exclude": [], "mtime": 90, "atime": 0, "ctime": 90, "tag": "delete", "tag_custom": "autotag"}]
  19. ;dirs = [{"name": ["*tmp*", "*TMP*", "*temp*", "*TEMP*", "*Temp*", "*cache*", "*CACHE*", "*Cache*"], "name_exclude": ["*templates*", "*Templates*"], "path": ["*/Application Support/*", "*/Containers/*"], "path_exclude": [], "mtime": 90, "atime": 0, "ctime": 90, "tag": "delete", "tag_custom": "autotag"}]
  20.  
  21. [elasticsearch]
  22. ; uncomment the below three lines if you are using AWS ES
  23. ;aws = False
  24. ;host = search-diskover-es-cluster-eg3yztrvzb6qucroyyjk2vokza.ap-northeast-1.es.amazonaws.com
  25. ;port = 443
  26. ; below two lines are for local ES, comment out if you are using AWS ES
  27. host = 192.168.1.222
  28. port = 9200
  29. ; uncomment the below two lines if you installed X-Pack, for http-auth
  30. user = elastic
  31. password = changeme
  32. ; index name for ES, cli arg overwrites this
  33. indexname = diskover-index
  34. ; timeout for connection to ES (default is 10)
  35. timeout = 30
  36. ; number of connections kept open to ES when crawling (default is 10)
  37. maxsize = 20
  38. ; max retries for ES operations (default is 0)
  39. maxretries = 10
  40. ; wait for at least yellow status before bulk uploading (default is False), set to True if you want to wait
  41. wait = False
  42. ; chunk size for ES bulk operations (default is 500)
  43. chunksize = 1000
  44. ; number of shards for index (default is 5)
  45. shards = 1
  46. ; number of replicas for index (default is 1)
  47. replicas = 0
  48. ; the below settings are to optimize ES for crawling
  49. ; index refresh interval (default is 1s), set to -1 to disable refresh during crawl (fastest performance but no index searches), after crawl is set back to 1s
  50. indexrefresh = 30s
  51. ; disable replicas during crawl - set to True to turn off replicas or False to keep on (default False), after crawl is set back to replicas value above
  52. disablereplicas = True
  53. ; transaction log flush threshold size (default 512mb)
  54. translogsize = 1gb
  55.  
  56. [redis]
  57. host = 192.168.1.222
  58. port = 6379
  59. ;password =
  60. ; cache directory times in Redis
  61. ; used for -I index2 when comparing directory times to get metadata from index2 instead of off disk
  62. ; set to True to cache dir times or False to turn off (default False)
  63. cachedirtimes = False
  64. ; how long in seconds directory keys lives in Redis (default 1 day)
  65. dirtimesttl = 604800
  66.  
  67. [treethreads]
  68. ; number of threads to use for tree walking down directories in rootdir (cores x 2 might be a good start)
  69. threads = 16
  70.  
  71. [adaptivebatch]
  72. ; adaptive batch settings when using -a (intelligent crawling)
  73. ; batchsize (numbers of dirs) to start at
  74. startsize = 50
  75. ; maximum size of batch
  76. maxsize = 500
  77. ; when adjusting batch size use this for +/- (increase when queues is > 0, decrease when 0)
  78. stepsize = 10
  79.  
  80. [workerbot]
  81. ; enable bot logs (True or False), bot logs will slow down crawl, use for debugging only
  82. botlogs = False
  83. ; log file directory to store worker logs
  84. ; log files are named diskover_bot_worker_<workername>_<time>_log
  85. logfiledir = /tmp
  86. ; time to wait (sec) before starting threads to help scrape file meta for long running rq jobs
  87. filethreadtime = 60
  88.  
  89. [paths]
  90. ; used by diskover socket server
  91. ; path to diskover.py (default is ./diskover.py)
  92. diskoverpath = /app/diskover/diskover.py
  93. ; path to python executable (default is python)
  94. pythonpath = python
  95.  
  96. [socketlistener]
  97. ; hostname and port (TCP) for diskover socket server for remote commands
  98. host = 0.0.0.0
  99. port = 9999
  100.  
  101. [dupescheck]
  102. ; read size (bytes) for md5 sum check (how many bytes to read in at a time when md5 checking, default 64 KB)
  103. readsize = 65536
  104. ; max size (bytes) of files to check (files larger than this will be skipped, default 1 GB)
  105. maxsize = 1073741824
  106. ; bytes to check at start and end of file before doing md5 sum check (set large enough to account for file header info, default is 64)
  107. checkbytes = 64
  108.  
  109. [crawlbot]
  110. ; continuous scanner
  111. ; time to sleep (seconds) between checking for directory changes
  112. sleeptime = 0.1
  113. ; number of threads for checking directories, setting this to num of cores x2 is a good starting point
  114. threads = 8
  115.  
  116. [gource]
  117. ; should be set to same in diskover-gource.sh
  118. maxfilelag = 0.1
  119.  
  120. [qumulo]
  121. ; Qumulo host
  122. ;cluster = 172.16.129.10
  123. ; Qumulo api user
  124. ;api_user = admin
  125. ; Qumulo api password
  126. ;api_password = admin
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement