Guest User

Untitled

a guest
Jan 23rd, 2018
66
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.61 KB | None | 0 0
  1. #!/usr/bin/env ruby
  2.  
  3. # Copyright (c) 2011 Fotonauts
  4. # All rights reserved.
  5. # Redistribution and use in source and binary forms, with or without
  6. # modification, are permitted provided that the following conditions are met:
  7. #
  8. # * Redistributions of source code must retain the above copyright
  9. # notice, this list of conditions and the following disclaimer.
  10. # * Redistributions in binary form must reproduce the above copyright
  11. # notice, this list of conditions and the following disclaimer in the
  12. # documentation and/or other materials provided with the distribution.
  13. # * Neither the name of the University of California, Berkeley nor the
  14. # names of its contributors may be used to endorse or promote products
  15. # derived from this software without specific prior written permission.
  16. #
  17. # THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY
  18. # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  19. # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  20. # DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY
  21. # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  22. # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  23. # OSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  24. # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  25. # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  26. # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  27.  
  28. require 'rubygems'
  29. require 'mongo'
  30. require 'bson'
  31.  
  32. # This script checks the cluster state and report if lag is too high on the given host
  33. # you can customise these values:
  34. WARNING = 400
  35. CRITICAL = 600
  36.  
  37. # You can call this script:
  38. # In nagios context, as a probe from the nagios server to the mongo node:
  39. # ./check_mongo_replica_member myservername.fqdn.com 27100
  40. #
  41. # In munin context by creating a symlink to this file:
  42. # ln -s check_mongo_replica_member mongolag-27100
  43. #
  44. # It will then behave as a munin probe:
  45. # ./mongolag-27100 config
  46. # returns the configuration
  47. # ./mongolag-27100
  48. # returns the data for munin
  49.  
  50. err = false
  51. warn= false
  52. err_msg = []
  53. warn_msg = []
  54.  
  55. hostname = 'localhost'
  56. port = nil
  57. munin = false
  58.  
  59. if ARGV.length == 2
  60. # Nagios probing
  61. hostname = ARGV[0]
  62. port = ARGV[1]
  63. end
  64. if port == "config" || port == nil
  65. # get parameters
  66. match_data = $0.match(/mongolag-([0-9]+)$/)
  67. exit(2) unless match_data
  68.  
  69. port = match_data[1].to_i
  70.  
  71. if ARGV.length == 1 && ARGV[0] == "config"
  72. # Munin "Config"
  73. puts "graph_title Shard #{port} lag to master"
  74. puts 'graph_vlabel Seconds'
  75. puts 'graph_category mongodb'
  76. cname = "shard_#{port}"
  77. puts "#{cname}.label Lag for shard @ #{port}"
  78. puts "#{cname}.warning #{WARNING}"
  79. puts "#{cname}.critical #{CRITICAL}"
  80.  
  81. exit 0
  82. else
  83. # Munit probe
  84. munin = true
  85. end
  86. end
  87.  
  88.  
  89. begin
  90.  
  91. m = Mongo::Connection.new(hostname, port, {:slave_ok => true})
  92. admin = m.db('admin')
  93.  
  94.  
  95. rs_status = admin.command({:replSetGetStatus => 1})
  96. set = rs_status["set"]
  97. myStatus = rs_status["myState"]
  98.  
  99. if myStatus == 4 || myStatus == 8
  100. err = true
  101. err_msg << "mongo @ #{port} has Fatal or Down state."
  102. end
  103.  
  104. master_optime = nil
  105. slave_optime = nil
  106.  
  107. rs_status["members"].each do |member|
  108. # Find master optime
  109. if member["state"] == 1
  110. master_optime = member["optime"].seconds
  111. end
  112. # Is it me ?
  113. if member["self"] == true
  114. my_optime = member["optime"].seconds
  115. slave_optime ||= my_optime
  116. # Take lower optime
  117. slave_optime = my_optime if slave_optime > my_optime
  118.  
  119. if not member["health"] == 1
  120. warn =true
  121. warn_msg << "Replica set #{set}. Server: #{member["name"]}. Error: #{member["errmsg"]}"
  122. end
  123. # Ignore Masters, Slaves and Arbiters, report other states
  124. if not [1,2,7].include?(member["state"])
  125. warn = true
  126. warn_msg << "Member #{member["name"]} is #{member["stateStr"]}."
  127. end
  128. end
  129. end
  130.  
  131. delta = (master_optime - slave_optime)
  132. delta = 0 if delta < 0
  133.  
  134. if munin
  135. cname = "shard_#{port}"
  136. puts "#{cname}.value #{delta}"
  137. exit 0
  138. end
  139.  
  140. if delta > WARNING
  141. warn = true
  142. warn_msg << "Skew is getting high (#{delta}s)"
  143. end
  144.  
  145. if delta > CRITICAL
  146. err = true
  147. err_msg << "Skew is too high (#{delta}s)"
  148. end
  149.  
  150. rescue SystemExit => e
  151. exit e.status
  152.  
  153. rescue Exception => e
  154. err = true
  155. err_msg << e.to_s
  156. err_msg << "while attempting to connect @ #{port}"
  157. end
  158.  
  159. if err
  160. puts "CRITICAL "+err_msg.join(", ")
  161. Process.exit 2
  162. end
  163. if warn
  164. puts "WARNING "+warn_msg.join(", ")
  165. Process.exit 1
  166. end
  167.  
  168. puts "OK\n"
Add Comment
Please, Sign In to add comment