-
-
Save chrisboulton/11337032 to your computer and use it in GitHub Desktop.
#!/usr/bin/env ruby | |
# | |
# Nagios plugin to monitor Redis sentinel | |
# | |
# Checks general connectivity to a Redis sentinel server and will go critical | |
# for any of the following conditions: | |
# * Inability to connect to the sentinel server | |
# * Sentinel reports it isn't monitoring any masters | |
# * Sentinel has entered TILT mode | |
# | |
# Arguments: | |
# -H --host HOSTNAME to connect to (defaults to 127.0.0.1) | |
# -p --port PORT to connect to (defaults to 26379) | |
# | |
# Requires the "redis" Rubygem | |
# | |
# Author: Chris Boulton <[email protected]> | |
# License: MIT (http://www.opensource.org/licenses/mit-license.php) | |
# | |
require 'redis' | |
require 'optparse' | |
STATES = { | |
:ok => 0, | |
:warning => 1, | |
:critical => 2, | |
:unknown => 3, | |
} | |
options = { | |
:host => '127.0.0.1', | |
:port => 26379, | |
:timeout => 2, | |
} | |
$results = [] | |
$exit_status = :ok | |
def add_state(status, msg, should_exit = false) | |
$results.push(msg) | |
$exit_status = status if STATES[status] > STATES[$exit_status] | |
if should_exit | |
do_exit | |
end | |
end | |
def add_info(msg) | |
$results.push(msg) | |
end | |
def do_exit | |
puts "#{$exit_status.upcase} - #{$results.join('. ')}" | |
exit STATES[$exit_status] | |
end | |
optparse = OptionParser.new do |opts| | |
opts.on('-H', '--host HOST', 'Hostname') do |h| | |
options[:host] = h | |
end | |
opts.on('-p', '--port PORT', 'Port') do |p| | |
options[:port] = p.to_i | |
end | |
end | |
optparse.parse! | |
begin | |
redis = Redis.new(:host => options[:host], :port => options[:port], :timeout => options[:timeout]) | |
info = redis.info | |
add_state(:critical, "Redis instance is not configured as a sentinel", true) unless info['sentinel_masters'] | |
add_state(:critical, "Sentinel has entered TILT mode", true) if info['sentinel_tilt'] != '0' | |
if info['sentinel_masters'] == '0' | |
add_state(:critical, "Sentinel is not monitoring any masters", true) | |
else | |
add_info("Monitoring #{info['sentinel_masters']} masters") | |
end | |
rescue Redis::CannotConnectError => e | |
add_state(:critical, e, true) | |
end | |
do_exit |
#!/usr/bin/env ruby | |
# | |
# Nagios plugin to monitor the reachability of a given Redis sentinel master | |
# | |
# Checks reachability of a given Redis master configured against a Redis | |
# sentinel server and will go critical for any of the following conditions: | |
# * Inability to connect to the sentinel server | |
# * The given Redis master is not configured on the sentinel server | |
# * No Redis address/port is returned for the given master name | |
# * The Redis instance returned is not reachable | |
# * The Redis instance returned does not have the role "master" | |
# | |
# Arguments: | |
# -m --master MASTER name (required) | |
# -H --host HOSTNAME to connect to (defaults to 127.0.0.1) | |
# -p --port PORT to connect to (defaults to 26379) | |
# | |
# Requires the "redis" Rubygem | |
# | |
# Author: Chris Boulton <[email protected]> | |
# License: MIT (http://www.opensource.org/licenses/mit-license.php) | |
# | |
require 'redis' | |
require 'optparse' | |
STATES = { | |
:ok => 0, | |
:warning => 1, | |
:critical => 2, | |
:unknown => 3, | |
} | |
options = { | |
:host => '127.0.0.1', | |
:port => 26379, | |
:master => nil, | |
:timeout => 2, | |
} | |
$results = [] | |
$exit_status = :ok | |
def add_state(status, msg, should_exit = false) | |
$results.push(msg) | |
$exit_status = status if STATES[status] > STATES[$exit_status] | |
if should_exit | |
do_exit | |
end | |
end | |
def add_info(msg) | |
$results.push(msg) | |
end | |
def do_exit | |
puts "#{$exit_status.upcase} - #{$results.join('. ')}" | |
exit STATES[$exit_status] | |
end | |
optparse = OptionParser.new do |opts| | |
opts.on('-H', '--host HOST', 'Hostname') do |h| | |
options[:host] = h | |
end | |
opts.on('-p', '--port PORT', 'Port') do |p| | |
options[:port] = p.to_i | |
end | |
opts.on('-m', '--master MASTER', 'Perform Sentinel health checks against this master') do |m| | |
options[:master] = m | |
end | |
end | |
optparse.parse! | |
unless options[:master] | |
puts optparse | |
exit(1) | |
end | |
begin | |
redis = Redis.new(:host => options[:host], :port => options[:port], :timeout => options[:timeout]) | |
rescue Redis::BaseConnectionError => e | |
add_state(:critical, e, true) | |
end | |
begin | |
master_addr, master_port = redis.sentinel('get-master-addr-by-name', options[:master], :timeout => options[:timeout]) | |
unless master_addr and master_port | |
add_state(:critical, "No master returned for #{options[:master]}", true) | |
end | |
rescue Redis::BaseError => e | |
add_state(:critical, "Could not establish master address: #{e}", true) | |
end | |
begin | |
master = Redis.new(:host => master_addr, :port => master_port) | |
node_info = master.info | |
rescue | |
add_state(:critical, "Master #{options[:master]} unreachable at #{master_addr}:#{master_port}", true) | |
end | |
add_info("Master is #{master_addr}:#{master_port}") | |
unless node_info['role'] == 'master' | |
add_state(:critical, "Role is #{node_info['role']} but should be master") | |
end | |
do_exit |
#!/usr/bin/env ruby | |
# | |
# Nagios plugin to monitor the health of a given Redis sentinel master | |
# | |
# Checks health of a HA Redis environment managed by Redis sentinel. | |
# Will go critical whenever the following conditions are met: | |
# * Sentinel server is not reachable | |
# * Sentinel server does not know of the given master | |
# * Sentinel server reports the master is objectively down | |
# * Sentinel server reports the master is subjectively down | |
# * Minimum number of known slaves is below critical threshold | |
# * Minimum number of healthy slaves is below critical threshold | |
# * Minimum number of known sentinels is below critical threshold | |
# * Minimum number of healthy sentinels is below critical threshold | |
# * A quorum setting has not been configured for this master or is 0 | |
# * The number of known sentinels does not meet the minimum for quorum | |
# * The number of healthy sentinels does not meet the minimum for quorum | |
# * A quorum setting has not been configured for this master or is 0 | |
# | |
# Will reach a warning state whenever the following conditions are met: | |
# * Minimum number of known slaves is below warning threshold | |
# * Minimum number of healthy slaves is below warning threshold | |
# * Minimum number of known sentinels is below warning threshold | |
# * Minimum number of healthy sentinels is below warning threshold | |
# | |
# Healthy slaves are determined in the same manner sentinel tests them: | |
# * Slave must not be O_DOWN/S_DOWN | |
# * Slave must not be disconnected | |
# * Slave priority must be > 0 | |
# * Slave must have sent an OK ping reply < 5s ago | |
# | |
# Healthy sentinels are determined as follows: | |
# * Sentinel must not be O_DOWN/S_DOWN | |
# * Sentinel must not be disconnected | |
# | |
# Warning and critical thresholds are optional and will not be checked if | |
# not supplied. | |
# | |
# Arguments: | |
# -m --master MASTER name (required) | |
# -w --warning WARNING_SLAVES,WARNING_SENTINELS | |
# -c --critical CRITICAL_SLAVES,CRITICAL_SENTINELS | |
# -H --host HOSTNAME to connect to (defaults to 127.0.0.1) | |
# -p --port PORT to connect to (defaults to 26379) | |
# | |
# Requires the "redis" Rubygem | |
# | |
# Author: Chris Boulton <[email protected]> | |
# License: MIT (http://www.opensource.org/licenses/mit-license.php) | |
# | |
require 'redis' | |
require 'optparse' | |
STATES = { | |
:ok => 0, | |
:warning => 1, | |
:critical => 2, | |
:unknown => 3, | |
} | |
options = { | |
:host => '127.0.0.1', | |
:port => 26379, | |
:master => nil, | |
:timeout => 2, | |
} | |
$results = [] | |
$exit_status = :ok | |
def add_state(status, msg, should_exit = false) | |
$results.push(msg) | |
$exit_status = status if STATES[status] > STATES[$exit_status] | |
if should_exit | |
do_exit | |
end | |
end | |
def add_info(msg) | |
$results.push(msg) | |
end | |
def do_exit | |
puts "#{$exit_status.upcase} - #{$results.join('. ')}" | |
exit STATES[$exit_status] | |
end | |
optparse = OptionParser.new do |opts| | |
opts.on('-H', '--host HOST', 'Hostname') do |h| | |
options[:host] = h | |
end | |
opts.on('-p', '--port PORT', 'Port') do |p| | |
options[:port] = p.to_i | |
end | |
opts.on('-m', '--master MASTER', 'Perform Sentinel health checks against this master') do |m| | |
options[:master] = m | |
end | |
opts.on('-w', '--warning THRESHOLDS', 'Comma separated string of warning thresholds for min slaves and min masters') do |t| | |
options[:min_slaves_warn], options[:min_sentinels_warn] = t.split(',').map{|i| i == '' ? nil : i.to_i } | |
end | |
opts.on('-c', '--critical THRESHOLDS', 'Comma separated string of critical thresholds for min slaves and min masters') do |t| | |
options[:min_slaves_crit], options[:min_sentinels_crit] = t.split(',').map{|i| i == '' ? nil : i.to_i } | |
end | |
end | |
optparse.parse! | |
unless options[:master] | |
puts optparse | |
exit(1) | |
end | |
begin | |
redis = Redis.new(:host => options[:host], :port => options[:port], :timeout => options[:timeout]) | |
rescue Redis::BaseConnectionError => e | |
add_state(:critical, e, true) | |
end | |
begin | |
master_info = Hash[*redis.sentinel('master', options[:master])] | |
sentinels = redis.sentinel('sentinels', options[:master]) | |
slaves = redis.sentinel('slaves', options[:master]) | |
quorum = master_info['quorum'].to_i | |
num_slaves = master_info['num-slaves'].to_i | |
num_sentinels = master_info['num-other-sentinels'].to_i + 1 | |
flags = master_info['flags'].split(',') | |
if flags.include?('o_down') | |
add_state(:critical, 'OBJECTIVELY DOWN') | |
elsif flags.include?('s_down') | |
add_state(:critical, 'SUBJECTIVELY DOWN') | |
end | |
num_healthy_slaves = slaves.count do |slave_info| | |
slave_info = Hash[*slave_info] | |
slave_status = slave_info['flags'].split(',') | |
!slave_status.include?('o_down') and | |
!slave_status.include?('s_down') and | |
!slave_status.include?('disconnected') and | |
slave_info['slave-priority'].to_i > 0 and | |
slave_info['last-ok-ping-reply'].to_i < 5000 | |
end | |
num_healthy_sentinels = sentinels.count do |sentinel_info| | |
sentinel_info = Hash[*sentinel_info] | |
sentinel_status = sentinel_info['flags'].split(',') | |
!sentinel_status.include?('o_down') and | |
!sentinel_status.include?('s_down') and | |
!sentinel_status.include?('disconnected') | |
end | |
# assume the local sentinel is healthy | |
num_healthy_sentinels += 1 | |
slave_status = "#{num_healthy_slaves}/#{num_slaves} slaves healthy" | |
sentinel_status = "#{num_healthy_sentinels}/#{num_sentinels} sentinels healthy" | |
if options[:min_slaves_crit] != nil and num_slaves <= options[:min_slaves_crit] | |
add_state(:critical, "#{num_slaves} known slaves, expected at least #{options[:min_slaves_crit] + 1}") | |
elsif options[:min_slaves_warn] != nil and num_slaves <= options[:min_slaves_warn] | |
add_state(:warning, "#{num_slaves} known slaves, expected at least #{options[:min_slaves_warn] + 1}") | |
else | |
add_info(slave_status) | |
end | |
if options[:min_slaves_crit] != nil and num_healthy_slaves <= options[:min_slaves_crit] | |
add_state(:critical, "Expected at least #{options[:min_slaves_crit] + 1} healthy slaves") | |
elsif options[:min_slaves_warn] != nil and num_healthy_slaves <= options[:min_slaves_warn] | |
add_state(:warning, "Expected at least #{options[:min_slaves_warn] + 1} healthy slaves") | |
end | |
if options[:min_sentinels_crit] != nil and num_sentinels <= options[:min_sentinels_crit] | |
add_state(:critical, "#{num_sentinels} known sentinels, expected at least #{min_sentinels_crit + 1}") | |
elsif options[:min_sentinels_warn] != nil and num_sentinels <= options[:min_sentinels_warn] | |
add_state(:warning, "#{num_sentinels} known sentinels, expected at least #{min_sentinels_warn + 1}") | |
else | |
add_info(sentinel_status) | |
end | |
if options[:min_sentinels_crit] != nil and num_healthy_sentinels <= options[:min_sentinels_crit] | |
add_state(:critical, "Expected at least #{options[:min_sentinels_crit] + 1} healthy sentinels") | |
elsif options[:min_sentinels_warn] != nil and num_healthy_sentinels <= options[:min_sentinels_warn] | |
add_state(:warning, "Expected at least #{options[:min_sentinels_warn] + 1} healthy sentinels") | |
end | |
if quorum == 0 | |
add_state(:critical, 'No quorum set') | |
elsif num_sentinels < quorum | |
add_state(:critical, 'Quorum not met') | |
elsif num_healthy_sentinels < quorum | |
add_state(:critical, "Not enough healthy sentinels for quorum (#{quorum})") | |
end | |
rescue Redis::CommandError => e | |
add_state(:critical, e) | |
end | |
do_exit |
Or you can use the dockerized version of the script:
docker run --rm mingalevme/redis-sentinel-health -m myCluster -w 2,1 -c 2,1
Hi,
I have an issue with
+++
./check_sentinel_master -H localhost -p 7503 -m master123
CRITICAL - Could not establish master address: ERR Wrong number of arguments for 'sentinel get-master-addr-by-name'
++++
never mind. figured it out.
FROM:
master_addr, master_port = redis.sentinel('get-master-addr-by-name', options[:master], :timeout => options[:timeout])
TO:
master_addr, master_port = redis.sentinel('get-master-addr-by-name', options[:master])
solved the issue.
anybody facing issues while integrating the ruby scripts with nagios.
I got the below error when Nagios daemon talks to nrpe on the client node. otherwise, the ruby script works fine in the client node.
++
NRPE: Unable to read output
++
@Sn4kY, replace
master_info = Hash[*redis.sentinel('master', options[:master])]
withmaster_info = redis.sentinel('master', options[:master])
, and removeslave_info = Hash[*slave_info]
andsentinel_info = Hash[*sentinel_info]
.