/usr/lib/netdata/conf.d/health.d
# you can disable an alarm notification by setting the 'to' line to: silent # ----------------------------------------------------------------------------- # low disk space # checking the latest collected values # raise an alarm if the disk is low on # available disk space template: disk_space_usage on: disk.space class: Utilization type: System component: Disk chart labels: mount_point=!/dev !/dev/* !/run !/run/* !HarddiskVolume* * calc: $used * 100 / ($avail + $used) units: % every: 1m warn: $this > (($status >= $WARNING ) ? (80) : (90)) crit: ($this > (($status == $CRITICAL) ? (90) : (98))) && $avail < 5 delay: up 1m down 15m multiplier 1.5 max 1h summary: Disk ${label:mount_point} space usage info: Total space utilization of disk ${label:mount_point} to: sysadmin template: disk_inode_usage on: disk.inodes class: Utilization type: System component: Disk chart labels: mount_point=!/dev !/dev/* !/run !/run/* * calc: $used * 100 / ($avail + $used) units: % every: 1m warn: $this > (($status >= $WARNING) ? (80) : (90)) crit: $this > (($status == $CRITICAL) ? (90) : (98)) delay: up 1m down 15m multiplier 1.5 max 1h summary: Disk ${label:mount_point} inode usage info: Total inode utilization of disk ${label:mount_point} to: sysadmin # ----------------------------------------------------------------------------- # disk fill rate # calculate the rate the disk fills # use as base, the available space change # during the last hour # this is just a calculation - it has no alarm # we will use it in the next template to find # the hours remaining template: disk_fill_rate on: disk.space lookup: min -10m at -50m unaligned of avail calc: ($this - $avail) / (($now - $after) / 3600) every: 1m units: GB/hour info: average rate the disk fills up (positive), or frees up (negative) space, for the last hour # calculate the hours remaining # if the disk continues to fill in this rate template: out_of_disk_space_time on: disk.space calc: ($disk_fill_rate > 0) ? ($avail / $disk_fill_rate) : (inf) units: hours every: 10s warn: $this > 0 and $this < (($status >= $WARNING) ? (48) : (8)) crit: $this > 0 and $this < (($status == $CRITICAL) ? (24) : (2)) delay: down 15m multiplier 1.2 max 1h summary: Disk ${label:mount_point} estimation of lack of space info: Estimated time the disk ${label:mount_point} will run out of space, if the system continues to add data with the rate of the last hour to: silent # ----------------------------------------------------------------------------- # disk inode fill rate # calculate the rate the disk inodes are allocated # use as base, the available inodes change # during the last hour # this is just a calculation - it has no alarm # we will use it in the next template to find # the hours remaining template: disk_inode_rate on: disk.inodes lookup: min -10m at -50m unaligned of avail calc: ($this - $avail) / (($now - $after) / 3600) every: 1m units: inodes/hour info: average rate at which disk inodes are allocated (positive), or freed (negative), for the last hour # calculate the hours remaining # if the disk inodes are allocated # in this rate template: out_of_disk_inodes_time on: disk.inodes calc: ($disk_inode_rate > 0) ? ($avail / $disk_inode_rate) : (inf) units: hours every: 10s warn: $this > 0 and $this < (($status >= $WARNING) ? (48) : (8)) crit: $this > 0 and $this < (($status == $CRITICAL) ? (24) : (2)) delay: down 15m multiplier 1.2 max 1h summary: Disk ${label:mount_point} estimation of lack of inodes info: Estimated time the disk ${label:mount_point} will run out of inodes, if the system continues to allocate inodes with the rate of the last hour to: silent # ----------------------------------------------------------------------------- # disk congestion # raise an alarm if the disk is congested # by calculating the average disk utilization # for the last 10 minutes template: 10min_disk_utilization on: disk.util class: Utilization type: System component: Disk lookup: average -10m unaligned units: % every: 1m warn: $this > 98 * (($status >= $WARNING) ? (0.7) : (1)) delay: down 15m multiplier 1.2 max 1h summary: Disk ${label:device} utilization info: Average percentage of time ${label:device} disk was busy over the last 10 minutes to: silent # raise an alarm if the disk backlog # is above 1000ms (1s) per second # for 10 minutes # (i.e. the disk cannot catch up) template: 10min_disk_backlog on: disk.backlog class: Latency type: System component: Disk lookup: average -10m unaligned units: ms every: 1m warn: $this > 5000 * (($status >= $WARNING) ? (0.7) : (1)) delay: down 15m multiplier 1.2 max 1h summary: Disk ${label:device} backlog info: Average backlog size of the ${label:device} disk over the last 10 minutes to: silent
.
Edit
..
Edit
adaptec_raid.conf
Edit
apcupsd.conf
Edit
as400.conf
Edit
bcache.conf
Edit
beanstalkd.conf
Edit
boinc.conf
Edit
btrfs.conf
Edit
ceph.conf
Edit
cgroups.conf
Edit
clickhouse.conf
Edit
cockroachdb.conf
Edit
consul.conf
Edit
cpu.conf
Edit
db2.conf
Edit
dbengine.conf
Edit
disks.conf
Edit
dns_query.conf
Edit
dnsmasq_dhcp.conf
Edit
docker.conf
Edit
elasticsearch.conf
Edit
entropy.conf
Edit
exporting.conf
Edit
file_descriptors.conf
Edit
gearman.conf
Edit
geth.conf
Edit
go.d.plugin.conf
Edit
haproxy.conf
Edit
hdfs.conf
Edit
httpcheck.conf
Edit
ioping.conf
Edit
ipc.conf
Edit
ipfs.conf
Edit
ipmi.conf
Edit
isc_dhcpd.conf
Edit
k8sstate.conf
Edit
kubelet.conf
Edit
load.conf
Edit
lvm.conf
Edit
mdstat.conf
Edit
megacli.conf
Edit
memcached.conf
Edit
memory.conf
Edit
ml.conf
Edit
mq.conf
Edit
mysql.conf
Edit
net.conf
Edit
netfilter.conf
Edit
nvme.conf
Edit
pihole.conf
Edit
ping.conf
Edit
plugin.conf
Edit
portcheck.conf
Edit
postgres.conf
Edit
power_supply_capacity.conf
Edit
processes.conf
Edit
proxysql.conf
Edit
python.d.plugin.conf
Edit
qos.conf
Edit
rabbitmq.conf
Edit
ram.conf
Edit
reboot.conf
Edit
redis.conf
Edit
retroshare.conf
Edit
riakkv.conf
Edit
scaleio.conf
Edit
softnet.conf
Edit
storcli.conf
Edit
streaming.conf
Edit
swap.conf
Edit
synchronization.conf
Edit
systemdunits.conf
Edit
tcp_conn.conf
Edit
tcp_listen.conf
Edit
tcp_mem.conf
Edit
tcp_orphans.conf
Edit
tcp_resets.conf
Edit
timex.conf
Edit
udp_errors.conf
Edit
unbound.conf
Edit
upsd.conf
Edit
vcsa.conf
Edit
vernemq.conf
Edit
vsphere.conf
Edit
web_log.conf
Edit
websphere_jmx.conf
Edit
websphere_mp.conf
Edit
websphere_pmi.conf
Edit
whoisquery.conf
Edit
x509check.conf
Edit
zfs.conf
Edit