Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env ruby
- # Copyright (c) 2011 Fotonauts
- # All rights reserved.
- # Redistribution and use in source and binary forms, with or without
- # modification, are permitted provided that the following conditions are met:
- #
- # * Redistributions of source code must retain the above copyright
- # notice, this list of conditions and the following disclaimer.
- # * Redistributions in binary form must reproduce the above copyright
- # notice, this list of conditions and the following disclaimer in the
- # documentation and/or other materials provided with the distribution.
- # * Neither the name of the University of California, Berkeley nor the
- # names of its contributors may be used to endorse or promote products
- # derived from this software without specific prior written permission.
- #
- # THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY
- # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- # DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY
- # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- # OSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- require 'rubygems'
- require 'mongo'
- require 'bson'
- # This script checks the cluster state and report if lag is too high on the given host
- # you can customise these values:
- WARNING = 400
- CRITICAL = 600
- # You can call this script:
- # In nagios context, as a probe from the nagios server to the mongo node:
- # ./check_mongo_replica_member myservername.fqdn.com 27100
- #
- # In munin context by creating a symlink to this file:
- # ln -s check_mongo_replica_member mongolag-27100
- #
- # It will then behave as a munin probe:
- # ./mongolag-27100 config
- # returns the configuration
- # ./mongolag-27100
- # returns the data for munin
- err = false
- warn= false
- err_msg = []
- warn_msg = []
- hostname = 'localhost'
- port = nil
- munin = false
- if ARGV.length == 2
- # Nagios probing
- hostname = ARGV[0]
- port = ARGV[1]
- end
- if port == "config" || port == nil
- # get parameters
- match_data = $0.match(/mongolag-([0-9]+)$/)
- exit(2) unless match_data
- port = match_data[1].to_i
- if ARGV.length == 1 && ARGV[0] == "config"
- # Munin "Config"
- puts "graph_title Shard #{port} lag to master"
- puts 'graph_vlabel Seconds'
- puts 'graph_category mongodb'
- cname = "shard_#{port}"
- puts "#{cname}.label Lag for shard @ #{port}"
- puts "#{cname}.warning #{WARNING}"
- puts "#{cname}.critical #{CRITICAL}"
- exit 0
- else
- # Munit probe
- munin = true
- end
- end
- begin
- m = Mongo::Connection.new(hostname, port, {:slave_ok => true})
- admin = m.db('admin')
- rs_status = admin.command({:replSetGetStatus => 1})
- set = rs_status["set"]
- myStatus = rs_status["myState"]
- if myStatus == 4 || myStatus == 8
- err = true
- err_msg << "mongo @ #{port} has Fatal or Down state."
- end
- master_optime = nil
- slave_optime = nil
- rs_status["members"].each do |member|
- # Find master optime
- if member["state"] == 1
- master_optime = member["optime"].seconds
- end
- # Is it me ?
- if member["self"] == true
- my_optime = member["optime"].seconds
- slave_optime ||= my_optime
- # Take lower optime
- slave_optime = my_optime if slave_optime > my_optime
- if not member["health"] == 1
- warn =true
- warn_msg << "Replica set #{set}. Server: #{member["name"]}. Error: #{member["errmsg"]}"
- end
- # Ignore Masters, Slaves and Arbiters, report other states
- if not [1,2,7].include?(member["state"])
- warn = true
- warn_msg << "Member #{member["name"]} is #{member["stateStr"]}."
- end
- end
- end
- delta = (master_optime - slave_optime)
- delta = 0 if delta < 0
- if munin
- cname = "shard_#{port}"
- puts "#{cname}.value #{delta}"
- exit 0
- end
- if delta > WARNING
- warn = true
- warn_msg << "Skew is getting high (#{delta}s)"
- end
- if delta > CRITICAL
- err = true
- err_msg << "Skew is too high (#{delta}s)"
- end
- rescue SystemExit => e
- exit e.status
- rescue Exception => e
- err = true
- err_msg << e.to_s
- err_msg << "while attempting to connect @ #{port}"
- end
- if err
- puts "CRITICAL "+err_msg.join(", ")
- Process.exit 2
- end
- if warn
- puts "WARNING "+warn_msg.join(", ")
- Process.exit 1
- end
- puts "OK\n"
Add Comment
Please, Sign In to add comment