/usr/lib/netdata/conf.d/health.d
# you can disable an alarm notification by setting the 'to' line to: silent # ----------------------------------------------------------------------------- # net traffic overflow template: interface_speed on: net.net class: Latency type: System component: Network calc: ( $nic_speed_max > 0 ) ? ( $nic_speed_max / 1000) : ( nan ) units: Mbit every: 10s info: Network interface ${label:device} current speed template: 1m_received_traffic_overflow on: net.net class: Workload type: System component: Network host labels: _os=linux windows lookup: average -1m unaligned absolute of received calc: ($interface_speed > 0) ? ($this * 100 / ($interface_speed * 1000)) : ( nan ) units: % every: 10s warn: $this > (($status >= $WARNING) ? (85) : (90)) delay: up 1m down 1m multiplier 1.5 max 1h summary: System network interface ${label:device} inbound utilization info: Average inbound utilization for the network interface ${label:device} over the last minute to: silent template: 1m_sent_traffic_overflow on: net.net class: Workload type: System component: Network host labels: _os=linux windows lookup: average -1m unaligned absolute of sent calc: ($interface_speed > 0) ? ($this * 100 / ($interface_speed * 1000)) : ( nan ) units: % every: 10s warn: $this > (($status >= $WARNING) ? (85) : (90)) delay: up 1m down 1m multiplier 1.5 max 1h summary: System network interface ${label:device} outbound utilization info: Average outbound utilization for the network interface ${label:device} over the last minute to: silent # ----------------------------------------------------------------------------- # dropped packets # check if an interface is dropping packets # the alarm is checked every 1 minute # and examines the last 10 minutes of data # # it is possible to have expected packet drops on an interface for some network configurations # look at the Monitoring Network Interfaces section in the proc.plugin documentation for more information template: net_interface_inbound_packets on: net.packets class: Workload type: System component: Network lookup: sum -10m unaligned absolute of received units: packets every: 1m summary: Network interface ${label:device} received packets info: Received packets for the network interface ${label:device} in the last 10 minutes template: net_interface_outbound_packets on: net.packets class: Workload type: System component: Network lookup: sum -10m unaligned absolute of sent units: packets every: 1m summary: Network interface ${label:device} sent packets info: Sent packets for the network interface ${label:device} in the last 10 minutes template: inbound_packets_dropped_ratio on: net.drops class: Errors type: System component: Network chart labels: device=!wl* * lookup: sum -10m unaligned absolute of inbound calc: (($net_interface_inbound_packets > 10000) ? ($this * 100 / $net_interface_inbound_packets) : (0)) units: % every: 1m warn: $this >= 2 delay: up 1m down 1h multiplier 1.5 max 2h summary: System network interface ${label:device} inbound drops info: Ratio of inbound dropped packets for the network interface ${label:device} over the last 10 minutes to: silent template: outbound_packets_dropped_ratio on: net.drops class: Errors type: System component: Network chart labels: device=!wl* * lookup: sum -10m unaligned absolute of outbound calc: (($net_interface_outbound_packets > 1000) ? ($this * 100 / $net_interface_outbound_packets) : (0)) units: % every: 1m warn: $this >= 2 delay: up 1m down 1h multiplier 1.5 max 2h summary: System network interface ${label:device} outbound drops info: Ratio of outbound dropped packets for the network interface ${label:device} over the last 10 minutes to: silent template: wifi_inbound_packets_dropped_ratio on: net.drops class: Errors type: System component: Network host labels: _os=linux chart labels: device=wl* lookup: sum -10m unaligned absolute of received calc: (($net_interface_inbound_packets > 10000) ? ($this * 100 / $net_interface_inbound_packets) : (0)) units: % every: 1m warn: $this >= 10 delay: up 1m down 1h multiplier 1.5 max 2h summary: System network interface ${label:device} inbound drops ratio info: Ratio of inbound dropped packets for the network interface ${label:device} over the last 10 minutes to: silent template: wifi_outbound_packets_dropped_ratio on: net.drops class: Errors type: System component: Network host labels: _os=linux chart labels: device=wl* lookup: sum -10m unaligned absolute of sent calc: (($net_interface_outbound_packets > 1000) ? ($this * 100 / $net_interface_outbound_packets) : (0)) units: % every: 1m warn: $this >= 10 delay: up 1m down 1h multiplier 1.5 max 2h summary: System network interface ${label:device} outbound drops ratio info: Ratio of outbound dropped packets for the network interface ${label:device} over the last 10 minutes to: silent # ----------------------------------------------------------------------------- # interface errors template: interface_inbound_errors on: net.errors class: Errors type: System component: Network host labels: _os=freebsd lookup: sum -10m unaligned absolute of inbound units: errors every: 1m warn: $this >= 5 delay: down 1h multiplier 1.5 max 2h summary: System network interface ${label:device} inbound errors info: Number of inbound errors for the network interface ${label:device} in the last 10 minutes to: silent template: interface_outbound_errors on: net.errors class: Errors type: System component: Network host labels: _os=freebsd lookup: sum -10m unaligned absolute of outbound units: errors every: 1m warn: $this >= 5 delay: down 1h multiplier 1.5 max 2h summary: System network interface ${label:device} outbound errors info: Number of outbound errors for the network interface ${label:device} in the last 10 minutes to: silent # ----------------------------------------------------------------------------- # FIFO errors # check if an interface is having FIFO # buffer errors # the alarm is checked every 1 minute # and examines the last 10 minutes of data template: 10min_fifo_errors on: net.fifo class: Errors type: System component: Network host labels: _os=linux lookup: sum -10m unaligned absolute units: errors every: 1m warn: $this > 0 delay: down 1h multiplier 1.5 max 2h summary: System network interface ${label:device} FIFO errors info: Number of FIFO errors for the network interface ${label:device} in the last 10 minutes to: silent # ----------------------------------------------------------------------------- # check for packet storms # 1. calculate the rate packets are received in 1m: 1m_received_packets_rate # 2. do the same for the last 10s # 3. raise an alarm if the later is 10x or 20x the first # we assume the minimum packet storm should at least have # 10000 packets/s, average of the last 10 seconds template: 1m_received_packets_rate on: net.packets class: Workload type: System component: Network lookup: average -1m unaligned of received units: packets every: 10s info: Average number of packets received by the network interface ${label:device} over the last minute template: 10s_received_packets_storm on: net.packets class: Workload type: System component: Network lookup: average -10s unaligned of received calc: $this * 100 / (($1m_received_packets_rate < 1000)?(1000):($1m_received_packets_rate)) every: 10s units: % warn: $this > (($status >= $WARNING)?(200):(5000)) crit: $this > (($status == $CRITICAL)?(5000):(6000)) options: no-clear-notification summary: System network interface ${label:device} inbound packet storm info: Ratio of average number of received packets for the network interface ${label:device} over the last 10 seconds, \ compared to the rate over the last minute to: silent # ----------------------------------------------------------------------------- # output queue length template: network_interface_output_queue_length on: net.queue_length class: Errors type: System component: Network host labels: _os=windows units: packets every: 10s warn: $length > 2 delay: up 1m down 1m multiplier 1.5 max 1h summary: System network interface ${label:device} output queue length info: The Output Queue Length on interface ${label:device} should be zero, otherwise there are delays and bottlenecks. to: silent
.
Edit
..
Edit
adaptec_raid.conf
Edit
apcupsd.conf
Edit
as400.conf
Edit
bcache.conf
Edit
beanstalkd.conf
Edit
boinc.conf
Edit
btrfs.conf
Edit
ceph.conf
Edit
cgroups.conf
Edit
clickhouse.conf
Edit
cockroachdb.conf
Edit
consul.conf
Edit
cpu.conf
Edit
db2.conf
Edit
dbengine.conf
Edit
disks.conf
Edit
dns_query.conf
Edit
dnsmasq_dhcp.conf
Edit
docker.conf
Edit
elasticsearch.conf
Edit
entropy.conf
Edit
exporting.conf
Edit
file_descriptors.conf
Edit
gearman.conf
Edit
geth.conf
Edit
go.d.plugin.conf
Edit
haproxy.conf
Edit
hdfs.conf
Edit
httpcheck.conf
Edit
ioping.conf
Edit
ipc.conf
Edit
ipfs.conf
Edit
ipmi.conf
Edit
isc_dhcpd.conf
Edit
k8sstate.conf
Edit
kubelet.conf
Edit
load.conf
Edit
lvm.conf
Edit
mdstat.conf
Edit
megacli.conf
Edit
memcached.conf
Edit
memory.conf
Edit
ml.conf
Edit
mq.conf
Edit
mysql.conf
Edit
net.conf
Edit
netfilter.conf
Edit
nvme.conf
Edit
pihole.conf
Edit
ping.conf
Edit
plugin.conf
Edit
portcheck.conf
Edit
postgres.conf
Edit
power_supply_capacity.conf
Edit
processes.conf
Edit
proxysql.conf
Edit
python.d.plugin.conf
Edit
qos.conf
Edit
rabbitmq.conf
Edit
ram.conf
Edit
reboot.conf
Edit
redis.conf
Edit
retroshare.conf
Edit
riakkv.conf
Edit
scaleio.conf
Edit
softnet.conf
Edit
storcli.conf
Edit
streaming.conf
Edit
swap.conf
Edit
synchronization.conf
Edit
systemdunits.conf
Edit
tcp_conn.conf
Edit
tcp_listen.conf
Edit
tcp_mem.conf
Edit
tcp_orphans.conf
Edit
tcp_resets.conf
Edit
timex.conf
Edit
udp_errors.conf
Edit
unbound.conf
Edit
upsd.conf
Edit
vcsa.conf
Edit
vernemq.conf
Edit
vsphere.conf
Edit
web_log.conf
Edit
websphere_jmx.conf
Edit
websphere_mp.conf
Edit
websphere_pmi.conf
Edit
whoisquery.conf
Edit
x509check.conf
Edit
zfs.conf
Edit