Created
April 27, 2014 03:34
-
-
Save chrisboulton/11337032 to your computer and use it in GitHub Desktop.
Awful Nagios plugins to monitor a Redis Sentinel environment
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
# | |
# Nagios plugin to monitor Redis sentinel | |
# | |
# Checks general connectivity to a Redis sentinel server and will go critical | |
# for any of the following conditions: | |
# * Inability to connect to the sentinel server | |
# * Sentinel reports it isn't monitoring any masters | |
# * Sentinel has entered TILT mode | |
# | |
# Arguments: | |
# -H --host HOSTNAME to connect to (defaults to 127.0.0.1) | |
# -p --port PORT to connect to (defaults to 26379) | |
# | |
# Requires the "redis" Rubygem | |
# | |
# Author: Chris Boulton <[email protected]> | |
# License: MIT (http://www.opensource.org/licenses/mit-license.php) | |
# | |
require 'redis' | |
require 'optparse' | |
STATES = { | |
:ok => 0, | |
:warning => 1, | |
:critical => 2, | |
:unknown => 3, | |
} | |
options = { | |
:host => '127.0.0.1', | |
:port => 26379, | |
:timeout => 2, | |
} | |
$results = [] | |
$exit_status = :ok | |
def add_state(status, msg, should_exit = false) | |
$results.push(msg) | |
$exit_status = status if STATES[status] > STATES[$exit_status] | |
if should_exit | |
do_exit | |
end | |
end | |
def add_info(msg) | |
$results.push(msg) | |
end | |
def do_exit | |
puts "#{$exit_status.upcase} - #{$results.join('. ')}" | |
exit STATES[$exit_status] | |
end | |
optparse = OptionParser.new do |opts| | |
opts.on('-H', '--host HOST', 'Hostname') do |h| | |
options[:host] = h | |
end | |
opts.on('-p', '--port PORT', 'Port') do |p| | |
options[:port] = p.to_i | |
end | |
end | |
optparse.parse! | |
begin | |
redis = Redis.new(:host => options[:host], :port => options[:port], :timeout => options[:timeout]) | |
info = redis.info | |
add_state(:critical, "Redis instance is not configured as a sentinel", true) unless info['sentinel_masters'] | |
add_state(:critical, "Sentinel has entered TILT mode", true) if info['sentinel_tilt'] != '0' | |
if info['sentinel_masters'] == '0' | |
add_state(:critical, "Sentinel is not monitoring any masters", true) | |
else | |
add_info("Monitoring #{info['sentinel_masters']} masters") | |
end | |
rescue Redis::CannotConnectError => e | |
add_state(:critical, e, true) | |
end | |
do_exit |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
# | |
# Nagios plugin to monitor the reachability of a given Redis sentinel master | |
# | |
# Checks reachability of a given Redis master configured against a Redis | |
# sentinel server and will go critical for any of the following conditions: | |
# * Inability to connect to the sentinel server | |
# * The given Redis master is not configured on the sentinel server | |
# * No Redis address/port is returned for the given master name | |
# * The Redis instance returned is not reachable | |
# * The Redis instance returned does not have the role "master" | |
# | |
# Arguments: | |
# -m --master MASTER name (required) | |
# -H --host HOSTNAME to connect to (defaults to 127.0.0.1) | |
# -p --port PORT to connect to (defaults to 26379) | |
# | |
# Requires the "redis" Rubygem | |
# | |
# Author: Chris Boulton <[email protected]> | |
# License: MIT (http://www.opensource.org/licenses/mit-license.php) | |
# | |
require 'redis' | |
require 'optparse' | |
STATES = { | |
:ok => 0, | |
:warning => 1, | |
:critical => 2, | |
:unknown => 3, | |
} | |
options = { | |
:host => '127.0.0.1', | |
:port => 26379, | |
:master => nil, | |
:timeout => 2, | |
} | |
$results = [] | |
$exit_status = :ok | |
def add_state(status, msg, should_exit = false) | |
$results.push(msg) | |
$exit_status = status if STATES[status] > STATES[$exit_status] | |
if should_exit | |
do_exit | |
end | |
end | |
def add_info(msg) | |
$results.push(msg) | |
end | |
def do_exit | |
puts "#{$exit_status.upcase} - #{$results.join('. ')}" | |
exit STATES[$exit_status] | |
end | |
optparse = OptionParser.new do |opts| | |
opts.on('-H', '--host HOST', 'Hostname') do |h| | |
options[:host] = h | |
end | |
opts.on('-p', '--port PORT', 'Port') do |p| | |
options[:port] = p.to_i | |
end | |
opts.on('-m', '--master MASTER', 'Perform Sentinel health checks against this master') do |m| | |
options[:master] = m | |
end | |
end | |
optparse.parse! | |
unless options[:master] | |
puts optparse | |
exit(1) | |
end | |
begin | |
redis = Redis.new(:host => options[:host], :port => options[:port], :timeout => options[:timeout]) | |
rescue Redis::BaseConnectionError => e | |
add_state(:critical, e, true) | |
end | |
begin | |
master_addr, master_port = redis.sentinel('get-master-addr-by-name', options[:master], :timeout => options[:timeout]) | |
unless master_addr and master_port | |
add_state(:critical, "No master returned for #{options[:master]}", true) | |
end | |
rescue Redis::BaseError => e | |
add_state(:critical, "Could not establish master address: #{e}", true) | |
end | |
begin | |
master = Redis.new(:host => master_addr, :port => master_port) | |
node_info = master.info | |
rescue | |
add_state(:critical, "Master #{options[:master]} unreachable at #{master_addr}:#{master_port}", true) | |
end | |
add_info("Master is #{master_addr}:#{master_port}") | |
unless node_info['role'] == 'master' | |
add_state(:critical, "Role is #{node_info['role']} but should be master") | |
end | |
do_exit |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
# | |
# Nagios plugin to monitor the health of a given Redis sentinel master | |
# | |
# Checks health of a HA Redis environment managed by Redis sentinel. | |
# Will go critical whenever the following conditions are met: | |
# * Sentinel server is not reachable | |
# * Sentinel server does not know of the given master | |
# * Sentinel server reports the master is objectively down | |
# * Sentinel server reports the master is subjectively down | |
# * Minimum number of known slaves is below critical threshold | |
# * Minimum number of healthy slaves is below critical threshold | |
# * Minimum number of known sentinels is below critical threshold | |
# * Minimum number of healthy sentinels is below critical threshold | |
# * A quorum setting has not been configured for this master or is 0 | |
# * The number of known sentinels does not meet the minimum for quorum | |
# * The number of healthy sentinels does not meet the minimum for quorum | |
# * A quorum setting has not been configured for this master or is 0 | |
# | |
# Will reach a warning state whenever the following conditions are met: | |
# * Minimum number of known slaves is below warning threshold | |
# * Minimum number of healthy slaves is below warning threshold | |
# * Minimum number of known sentinels is below warning threshold | |
# * Minimum number of healthy sentinels is below warning threshold | |
# | |
# Healthy slaves are determined in the same manner sentinel tests them: | |
# * Slave must not be O_DOWN/S_DOWN | |
# * Slave must not be disconnected | |
# * Slave priority must be > 0 | |
# * Slave must have sent an OK ping reply < 5s ago | |
# | |
# Healthy sentinels are determined as follows: | |
# * Sentinel must not be O_DOWN/S_DOWN | |
# * Sentinel must not be disconnected | |
# | |
# Warning and critical thresholds are optional and will not be checked if | |
# not supplied. | |
# | |
# Arguments: | |
# -m --master MASTER name (required) | |
# -w --warning WARNING_SLAVES,WARNING_SENTINELS | |
# -c --critical CRITICAL_SLAVES,CRITICAL_SENTINELS | |
# -H --host HOSTNAME to connect to (defaults to 127.0.0.1) | |
# -p --port PORT to connect to (defaults to 26379) | |
# | |
# Requires the "redis" Rubygem | |
# | |
# Author: Chris Boulton <[email protected]> | |
# License: MIT (http://www.opensource.org/licenses/mit-license.php) | |
# | |
require 'redis' | |
require 'optparse' | |
STATES = { | |
:ok => 0, | |
:warning => 1, | |
:critical => 2, | |
:unknown => 3, | |
} | |
options = { | |
:host => '127.0.0.1', | |
:port => 26379, | |
:master => nil, | |
:timeout => 2, | |
} | |
$results = [] | |
$exit_status = :ok | |
def add_state(status, msg, should_exit = false) | |
$results.push(msg) | |
$exit_status = status if STATES[status] > STATES[$exit_status] | |
if should_exit | |
do_exit | |
end | |
end | |
def add_info(msg) | |
$results.push(msg) | |
end | |
def do_exit | |
puts "#{$exit_status.upcase} - #{$results.join('. ')}" | |
exit STATES[$exit_status] | |
end | |
optparse = OptionParser.new do |opts| | |
opts.on('-H', '--host HOST', 'Hostname') do |h| | |
options[:host] = h | |
end | |
opts.on('-p', '--port PORT', 'Port') do |p| | |
options[:port] = p.to_i | |
end | |
opts.on('-m', '--master MASTER', 'Perform Sentinel health checks against this master') do |m| | |
options[:master] = m | |
end | |
opts.on('-w', '--warning THRESHOLDS', 'Comma separated string of warning thresholds for min slaves and min masters') do |t| | |
options[:min_slaves_warn], options[:min_sentinels_warn] = t.split(',').map{|i| i == '' ? nil : i.to_i } | |
end | |
opts.on('-c', '--critical THRESHOLDS', 'Comma separated string of critical thresholds for min slaves and min masters') do |t| | |
options[:min_slaves_crit], options[:min_sentinels_crit] = t.split(',').map{|i| i == '' ? nil : i.to_i } | |
end | |
end | |
optparse.parse! | |
unless options[:master] | |
puts optparse | |
exit(1) | |
end | |
begin | |
redis = Redis.new(:host => options[:host], :port => options[:port], :timeout => options[:timeout]) | |
rescue Redis::BaseConnectionError => e | |
add_state(:critical, e, true) | |
end | |
begin | |
master_info = Hash[*redis.sentinel('master', options[:master])] | |
sentinels = redis.sentinel('sentinels', options[:master]) | |
slaves = redis.sentinel('slaves', options[:master]) | |
quorum = master_info['quorum'].to_i | |
num_slaves = master_info['num-slaves'].to_i | |
num_sentinels = master_info['num-other-sentinels'].to_i + 1 | |
flags = master_info['flags'].split(',') | |
if flags.include?('o_down') | |
add_state(:critical, 'OBJECTIVELY DOWN') | |
elsif flags.include?('s_down') | |
add_state(:critical, 'SUBJECTIVELY DOWN') | |
end | |
num_healthy_slaves = slaves.count do |slave_info| | |
slave_info = Hash[*slave_info] | |
slave_status = slave_info['flags'].split(',') | |
!slave_status.include?('o_down') and | |
!slave_status.include?('s_down') and | |
!slave_status.include?('disconnected') and | |
slave_info['slave-priority'].to_i > 0 and | |
slave_info['last-ok-ping-reply'].to_i < 5000 | |
end | |
num_healthy_sentinels = sentinels.count do |sentinel_info| | |
sentinel_info = Hash[*sentinel_info] | |
sentinel_status = sentinel_info['flags'].split(',') | |
!sentinel_status.include?('o_down') and | |
!sentinel_status.include?('s_down') and | |
!sentinel_status.include?('disconnected') | |
end | |
# assume the local sentinel is healthy | |
num_healthy_sentinels += 1 | |
slave_status = "#{num_healthy_slaves}/#{num_slaves} slaves healthy" | |
sentinel_status = "#{num_healthy_sentinels}/#{num_sentinels} sentinels healthy" | |
if options[:min_slaves_crit] != nil and num_slaves <= options[:min_slaves_crit] | |
add_state(:critical, "#{num_slaves} known slaves, expected at least #{options[:min_slaves_crit] + 1}") | |
elsif options[:min_slaves_warn] != nil and num_slaves <= options[:min_slaves_warn] | |
add_state(:warning, "#{num_slaves} known slaves, expected at least #{options[:min_slaves_warn] + 1}") | |
else | |
add_info(slave_status) | |
end | |
if options[:min_slaves_crit] != nil and num_healthy_slaves <= options[:min_slaves_crit] | |
add_state(:critical, "Expected at least #{options[:min_slaves_crit] + 1} healthy slaves") | |
elsif options[:min_slaves_warn] != nil and num_healthy_slaves <= options[:min_slaves_warn] | |
add_state(:warning, "Expected at least #{options[:min_slaves_warn] + 1} healthy slaves") | |
end | |
if options[:min_sentinels_crit] != nil and num_sentinels <= options[:min_sentinels_crit] | |
add_state(:critical, "#{num_sentinels} known sentinels, expected at least #{min_sentinels_crit + 1}") | |
elsif options[:min_sentinels_warn] != nil and num_sentinels <= options[:min_sentinels_warn] | |
add_state(:warning, "#{num_sentinels} known sentinels, expected at least #{min_sentinels_warn + 1}") | |
else | |
add_info(sentinel_status) | |
end | |
if options[:min_sentinels_crit] != nil and num_healthy_sentinels <= options[:min_sentinels_crit] | |
add_state(:critical, "Expected at least #{options[:min_sentinels_crit] + 1} healthy sentinels") | |
elsif options[:min_sentinels_warn] != nil and num_healthy_sentinels <= options[:min_sentinels_warn] | |
add_state(:warning, "Expected at least #{options[:min_sentinels_warn] + 1} healthy sentinels") | |
end | |
if quorum == 0 | |
add_state(:critical, 'No quorum set') | |
elsif num_sentinels < quorum | |
add_state(:critical, 'Quorum not met') | |
elsif num_healthy_sentinels < quorum | |
add_state(:critical, "Not enough healthy sentinels for quorum (#{quorum})") | |
end | |
rescue Redis::CommandError => e | |
add_state(:critical, e) | |
end | |
do_exit |
Hi,
I have an issue with
+++
./check_sentinel_master -H localhost -p 7503 -m master123
CRITICAL - Could not establish master address: ERR Wrong number of arguments for 'sentinel get-master-addr-by-name'
++++
never mind. figured it out.
FROM:
master_addr, master_port = redis.sentinel('get-master-addr-by-name', options[:master], :timeout => options[:timeout])
TO:
master_addr, master_port = redis.sentinel('get-master-addr-by-name', options[:master])
solved the issue.
anybody facing issues while integrating the ruby scripts with nagios.
I got the below error when Nagios daemon talks to nrpe on the client node. otherwise, the ruby script works fine in the client node.
++
NRPE: Unable to read output
++
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Or you can use the dockerized version of the script: