Skip to content

Instantly share code, notes, and snippets.

@bhcopeland
Last active October 27, 2017 15:32
Show Gist options
  • Save bhcopeland/b54d3c678a0cb6e87119 to your computer and use it in GitHub Desktop.
Save bhcopeland/b54d3c678a0cb6e87119 to your computer and use it in GitHub Desktop.
nvidia-smi, temperature, fan speed and memory details. Zabbix UserParameter and Template
<?xml version="1.0" encoding="UTF-8"?>
<zabbix_export>
<version>2.0</version>
<date>2014-07-13T23:42:46Z</date>
<groups>
<group>
<name>Templates</name>
</group>
</groups>
<templates>
<template>
<template>nvidia_sensors</template>
<name>nvidia_sensors</name>
<groups>
<group>
<name>Templates</name>
</group>
</groups>
<applications>
<application>
<name>Nvidia</name>
</application>
</applications>
<items>
<item>
<name>GPU0 Fan Speed</name>
<type>0</type>
<snmp_community/>
<multiplier>0</multiplier>
<snmp_oid/>
<key>gpu.fanspeed[0]</key>
<delay>30</delay>
<history>90</history>
<trends>365</trends>
<status>0</status>
<value_type>0</value_type>
<allowed_hosts/>
<units/>
<delta>0</delta>
<snmpv3_contextname/>
<snmpv3_securityname/>
<snmpv3_securitylevel>0</snmpv3_securitylevel>
<snmpv3_authprotocol>0</snmpv3_authprotocol>
<snmpv3_authpassphrase/>
<snmpv3_privprotocol>0</snmpv3_privprotocol>
<snmpv3_privpassphrase/>
<formula>1</formula>
<delay_flex/>
<params/>
<ipmi_sensor/>
<data_type>0</data_type>
<authtype>0</authtype>
<username/>
<password/>
<publickey/>
<privatekey/>
<port/>
<description/>
<inventory_link>0</inventory_link>
<applications>
<application>
<name>Nvidia</name>
</application>
</applications>
<valuemap/>
</item>
<item>
<name>GPU0 Free Memory</name>
<type>0</type>
<snmp_community/>
<multiplier>0</multiplier>
<snmp_oid/>
<key>gpu.free[0]</key>
<delay>30</delay>
<history>90</history>
<trends>365</trends>
<status>0</status>
<value_type>0</value_type>
<allowed_hosts/>
<units/>
<delta>0</delta>
<snmpv3_contextname/>
<snmpv3_securityname/>
<snmpv3_securitylevel>0</snmpv3_securitylevel>
<snmpv3_authprotocol>0</snmpv3_authprotocol>
<snmpv3_authpassphrase/>
<snmpv3_privprotocol>0</snmpv3_privprotocol>
<snmpv3_privpassphrase/>
<formula>1</formula>
<delay_flex/>
<params/>
<ipmi_sensor/>
<data_type>0</data_type>
<authtype>0</authtype>
<username/>
<password/>
<publickey/>
<privatekey/>
<port/>
<description/>
<inventory_link>0</inventory_link>
<applications>
<application>
<name>Nvidia</name>
</application>
</applications>
<valuemap/>
</item>
<item>
<name>GPU0 Temperature</name>
<type>0</type>
<snmp_community/>
<multiplier>0</multiplier>
<snmp_oid/>
<key>gpu.temp[0]</key>
<delay>30</delay>
<history>90</history>
<trends>365</trends>
<status>0</status>
<value_type>0</value_type>
<allowed_hosts/>
<units/>
<delta>0</delta>
<snmpv3_contextname/>
<snmpv3_securityname/>
<snmpv3_securitylevel>0</snmpv3_securitylevel>
<snmpv3_authprotocol>0</snmpv3_authprotocol>
<snmpv3_authpassphrase/>
<snmpv3_privprotocol>0</snmpv3_privprotocol>
<snmpv3_privpassphrase/>
<formula>1</formula>
<delay_flex/>
<params/>
<ipmi_sensor/>
<data_type>0</data_type>
<authtype>0</authtype>
<username/>
<password/>
<publickey/>
<privatekey/>
<port/>
<description/>
<inventory_link>0</inventory_link>
<applications>
<application>
<name>Nvidia</name>
</application>
</applications>
<valuemap/>
</item>
<item>
<name>GPU0 Total Memory</name>
<type>0</type>
<snmp_community/>
<multiplier>0</multiplier>
<snmp_oid/>
<key>gpu.memtotal[0]</key>
<delay>30</delay>
<history>90</history>
<trends>365</trends>
<status>0</status>
<value_type>0</value_type>
<allowed_hosts/>
<units/>
<delta>0</delta>
<snmpv3_contextname/>
<snmpv3_securityname/>
<snmpv3_securitylevel>0</snmpv3_securitylevel>
<snmpv3_authprotocol>0</snmpv3_authprotocol>
<snmpv3_authpassphrase/>
<snmpv3_privprotocol>0</snmpv3_privprotocol>
<snmpv3_privpassphrase/>
<formula>1</formula>
<delay_flex/>
<params/>
<ipmi_sensor/>
<data_type>0</data_type>
<authtype>0</authtype>
<username/>
<password/>
<publickey/>
<privatekey/>
<port/>
<description/>
<inventory_link>0</inventory_link>
<applications>
<application>
<name>Nvidia</name>
</application>
</applications>
<valuemap/>
</item>
<item>
<name>GPU0 Used Memory</name>
<type>0</type>
<snmp_community/>
<multiplier>0</multiplier>
<snmp_oid/>
<key>gpu.used[0]</key>
<delay>30</delay>
<history>90</history>
<trends>365</trends>
<status>0</status>
<value_type>0</value_type>
<allowed_hosts/>
<units/>
<delta>0</delta>
<snmpv3_contextname/>
<snmpv3_securityname/>
<snmpv3_securitylevel>0</snmpv3_securitylevel>
<snmpv3_authprotocol>0</snmpv3_authprotocol>
<snmpv3_authpassphrase/>
<snmpv3_privprotocol>0</snmpv3_privprotocol>
<snmpv3_privpassphrase/>
<formula>1</formula>
<delay_flex/>
<params/>
<ipmi_sensor/>
<data_type>0</data_type>
<authtype>0</authtype>
<username/>
<password/>
<publickey/>
<privatekey/>
<port/>
<description/>
<inventory_link>0</inventory_link>
<applications>
<application>
<name>Nvidia</name>
</application>
</applications>
<valuemap/>
</item>
<item>
<name>GPU1 Fan Speed</name>
<type>0</type>
<snmp_community/>
<multiplier>0</multiplier>
<snmp_oid/>
<key>gpu.fanspeed[1]</key>
<delay>30</delay>
<history>90</history>
<trends>365</trends>
<status>0</status>
<value_type>0</value_type>
<allowed_hosts/>
<units/>
<delta>0</delta>
<snmpv3_contextname/>
<snmpv3_securityname/>
<snmpv3_securitylevel>0</snmpv3_securitylevel>
<snmpv3_authprotocol>0</snmpv3_authprotocol>
<snmpv3_authpassphrase/>
<snmpv3_privprotocol>0</snmpv3_privprotocol>
<snmpv3_privpassphrase/>
<formula>1</formula>
<delay_flex/>
<params/>
<ipmi_sensor/>
<data_type>0</data_type>
<authtype>0</authtype>
<username/>
<password/>
<publickey/>
<privatekey/>
<port/>
<description/>
<inventory_link>0</inventory_link>
<applications>
<application>
<name>Nvidia</name>
</application>
</applications>
<valuemap/>
</item>
<item>
<name>GPU1 Free Memory</name>
<type>0</type>
<snmp_community/>
<multiplier>0</multiplier>
<snmp_oid/>
<key>gpu.free[1]</key>
<delay>30</delay>
<history>90</history>
<trends>365</trends>
<status>0</status>
<value_type>0</value_type>
<allowed_hosts/>
<units/>
<delta>0</delta>
<snmpv3_contextname/>
<snmpv3_securityname/>
<snmpv3_securitylevel>0</snmpv3_securitylevel>
<snmpv3_authprotocol>0</snmpv3_authprotocol>
<snmpv3_authpassphrase/>
<snmpv3_privprotocol>0</snmpv3_privprotocol>
<snmpv3_privpassphrase/>
<formula>1</formula>
<delay_flex/>
<params/>
<ipmi_sensor/>
<data_type>0</data_type>
<authtype>0</authtype>
<username/>
<password/>
<publickey/>
<privatekey/>
<port/>
<description/>
<inventory_link>0</inventory_link>
<applications>
<application>
<name>Nvidia</name>
</application>
</applications>
<valuemap/>
</item>
<item>
<name>GPU1 Temperature</name>
<type>0</type>
<snmp_community/>
<multiplier>0</multiplier>
<snmp_oid/>
<key>gpu.temp[1]</key>
<delay>30</delay>
<history>90</history>
<trends>365</trends>
<status>0</status>
<value_type>0</value_type>
<allowed_hosts/>
<units/>
<delta>0</delta>
<snmpv3_contextname/>
<snmpv3_securityname/>
<snmpv3_securitylevel>0</snmpv3_securitylevel>
<snmpv3_authprotocol>0</snmpv3_authprotocol>
<snmpv3_authpassphrase/>
<snmpv3_privprotocol>0</snmpv3_privprotocol>
<snmpv3_privpassphrase/>
<formula>1</formula>
<delay_flex/>
<params/>
<ipmi_sensor/>
<data_type>0</data_type>
<authtype>0</authtype>
<username/>
<password/>
<publickey/>
<privatekey/>
<port/>
<description/>
<inventory_link>0</inventory_link>
<applications>
<application>
<name>Nvidia</name>
</application>
</applications>
<valuemap/>
</item>
<item>
<name>GPU1 Total Memory</name>
<type>0</type>
<snmp_community/>
<multiplier>0</multiplier>
<snmp_oid/>
<key>gpu.memtotal[1]</key>
<delay>30</delay>
<history>90</history>
<trends>365</trends>
<status>0</status>
<value_type>0</value_type>
<allowed_hosts/>
<units/>
<delta>0</delta>
<snmpv3_contextname/>
<snmpv3_securityname/>
<snmpv3_securitylevel>0</snmpv3_securitylevel>
<snmpv3_authprotocol>0</snmpv3_authprotocol>
<snmpv3_authpassphrase/>
<snmpv3_privprotocol>0</snmpv3_privprotocol>
<snmpv3_privpassphrase/>
<formula>1</formula>
<delay_flex/>
<params/>
<ipmi_sensor/>
<data_type>0</data_type>
<authtype>0</authtype>
<username/>
<password/>
<publickey/>
<privatekey/>
<port/>
<description/>
<inventory_link>0</inventory_link>
<applications>
<application>
<name>Nvidia</name>
</application>
</applications>
<valuemap/>
</item>
<item>
<name>GPU1 Used Memory</name>
<type>0</type>
<snmp_community/>
<multiplier>0</multiplier>
<snmp_oid/>
<key>gpu.used[1]</key>
<delay>30</delay>
<history>90</history>
<trends>365</trends>
<status>0</status>
<value_type>0</value_type>
<allowed_hosts/>
<units/>
<delta>0</delta>
<snmpv3_contextname/>
<snmpv3_securityname/>
<snmpv3_securitylevel>0</snmpv3_securitylevel>
<snmpv3_authprotocol>0</snmpv3_authprotocol>
<snmpv3_authpassphrase/>
<snmpv3_privprotocol>0</snmpv3_privprotocol>
<snmpv3_privpassphrase/>
<formula>1</formula>
<delay_flex/>
<params/>
<ipmi_sensor/>
<data_type>0</data_type>
<authtype>0</authtype>
<username/>
<password/>
<publickey/>
<privatekey/>
<port/>
<description/>
<inventory_link>0</inventory_link>
<applications>
<application>
<name>Nvidia</name>
</application>
</applications>
<valuemap/>
</item>
</items>
<discovery_rules/>
<macros/>
<templates/>
<screens/>
</template>
</templates>
<triggers>
<trigger>
<expression>{nvidia_sensors:gpu.temp[0].prev(0)}&gt;95</expression>
<name>GPU Temperature over 95c {HOSTNAME}</name>
<url/>
<status>0</status>
<priority>3</priority>
<description/>
<type>0</type>
<dependencies/>
</trigger>
</triggers>
<graphs>
<graph>
<name>GPU Fan Speed</name>
<width>900</width>
<height>200</height>
<yaxismin>0.0000</yaxismin>
<yaxismax>100.0000</yaxismax>
<show_work_period>1</show_work_period>
<show_triggers>1</show_triggers>
<type>0</type>
<show_legend>1</show_legend>
<show_3d>0</show_3d>
<percent_left>0.0000</percent_left>
<percent_right>0.0000</percent_right>
<ymin_type_1>0</ymin_type_1>
<ymax_type_1>0</ymax_type_1>
<ymin_item_1>0</ymin_item_1>
<ymax_item_1>0</ymax_item_1>
<graph_items>
<graph_item>
<sortorder>0</sortorder>
<drawtype>0</drawtype>
<color>C80000</color>
<yaxisside>0</yaxisside>
<calc_fnc>2</calc_fnc>
<type>0</type>
<item>
<host>nvidia_sensors</host>
<key>gpu.fanspeed[0]</key>
</item>
</graph_item>
<graph_item>
<sortorder>1</sortorder>
<drawtype>0</drawtype>
<color>00C800</color>
<yaxisside>0</yaxisside>
<calc_fnc>2</calc_fnc>
<type>0</type>
<item>
<host>nvidia_sensors</host>
<key>gpu.fanspeed[1]</key>
</item>
</graph_item>
</graph_items>
</graph>
<graph>
<name>GPU Memory</name>
<width>900</width>
<height>200</height>
<yaxismin>0.0000</yaxismin>
<yaxismax>100.0000</yaxismax>
<show_work_period>1</show_work_period>
<show_triggers>1</show_triggers>
<type>0</type>
<show_legend>1</show_legend>
<show_3d>0</show_3d>
<percent_left>0.0000</percent_left>
<percent_right>0.0000</percent_right>
<ymin_type_1>0</ymin_type_1>
<ymax_type_1>0</ymax_type_1>
<ymin_item_1>0</ymin_item_1>
<ymax_item_1>0</ymax_item_1>
<graph_items>
<graph_item>
<sortorder>0</sortorder>
<drawtype>0</drawtype>
<color>C80000</color>
<yaxisside>0</yaxisside>
<calc_fnc>2</calc_fnc>
<type>0</type>
<item>
<host>nvidia_sensors</host>
<key>gpu.free[0]</key>
</item>
</graph_item>
<graph_item>
<sortorder>1</sortorder>
<drawtype>0</drawtype>
<color>00C800</color>
<yaxisside>0</yaxisside>
<calc_fnc>2</calc_fnc>
<type>0</type>
<item>
<host>nvidia_sensors</host>
<key>gpu.memtotal[0]</key>
</item>
</graph_item>
<graph_item>
<sortorder>2</sortorder>
<drawtype>0</drawtype>
<color>0000C8</color>
<yaxisside>0</yaxisside>
<calc_fnc>2</calc_fnc>
<type>0</type>
<item>
<host>nvidia_sensors</host>
<key>gpu.used[0]</key>
</item>
</graph_item>
<graph_item>
<sortorder>3</sortorder>
<drawtype>0</drawtype>
<color>C800C8</color>
<yaxisside>0</yaxisside>
<calc_fnc>2</calc_fnc>
<type>0</type>
<item>
<host>nvidia_sensors</host>
<key>gpu.free[1]</key>
</item>
</graph_item>
<graph_item>
<sortorder>4</sortorder>
<drawtype>0</drawtype>
<color>00C8C8</color>
<yaxisside>0</yaxisside>
<calc_fnc>2</calc_fnc>
<type>0</type>
<item>
<host>nvidia_sensors</host>
<key>gpu.memtotal[1]</key>
</item>
</graph_item>
<graph_item>
<sortorder>5</sortorder>
<drawtype>0</drawtype>
<color>C8C800</color>
<yaxisside>0</yaxisside>
<calc_fnc>2</calc_fnc>
<type>0</type>
<item>
<host>nvidia_sensors</host>
<key>gpu.used[1]</key>
</item>
</graph_item>
</graph_items>
</graph>
<graph>
<name>GPU Temps</name>
<width>900</width>
<height>200</height>
<yaxismin>0.0000</yaxismin>
<yaxismax>100.0000</yaxismax>
<show_work_period>1</show_work_period>
<show_triggers>1</show_triggers>
<type>0</type>
<show_legend>1</show_legend>
<show_3d>0</show_3d>
<percent_left>0.0000</percent_left>
<percent_right>0.0000</percent_right>
<ymin_type_1>0</ymin_type_1>
<ymax_type_1>0</ymax_type_1>
<ymin_item_1>0</ymin_item_1>
<ymax_item_1>0</ymax_item_1>
<graph_items>
<graph_item>
<sortorder>0</sortorder>
<drawtype>0</drawtype>
<color>C80000</color>
<yaxisside>0</yaxisside>
<calc_fnc>2</calc_fnc>
<type>0</type>
<item>
<host>nvidia_sensors</host>
<key>gpu.temp[0]</key>
</item>
</graph_item>
<graph_item>
<sortorder>1</sortorder>
<drawtype>0</drawtype>
<color>00C800</color>
<yaxisside>0</yaxisside>
<calc_fnc>2</calc_fnc>
<type>0</type>
<item>
<host>nvidia_sensors</host>
<key>gpu.temp[1]</key>
</item>
</graph_item>
</graph_items>
</graph>
</graphs>
</zabbix_export>
UserParameter=sensors.temp, sudo sensors | grep "Physical id 0:" | awk '{ print $4+0 }'
UserParameter=gpu.temp[*],nvidia-smi -q --gpu=$1 |grep "GPU Current Temp"|grep -v "N/A"|cut -c 38-40
UserParameter=gpu.memtotal[*],nvidia-smi -q --gpu=$1 |grep Total|cut -c 38-50|grep -o '[0-9]*'
UserParameter=gpu.used[*],nvidia-smi -q --gpu=$1 |grep Used|cut -c 38-50|grep -o '[0-9]*'
UserParameter=gpu.free[*],nvidia-smi -q --gpu=$1 |grep Free|cut -c 38-50|grep -o '[0-9]*'
UserParameter=gpu.fanspeed[*],nvidia-smi -q --gpu=$1 |grep Fan|cut -c 38-50|grep -o '[0-9]*'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment