groups: - name: host rules: - alert: TargetDown expr: up == 0 for: 2m labels: {severity: critical} annotations: summary: "Таргет {{ $labels.job }} на {{ $labels.instance }} недоступен" - alert: HostHighCPU expr: 100 - (avg by(instance) (rate(node_cpu_seconds_total{mode="idle"}[5m])) * 100) > 90 for: 10m labels: {severity: warning} annotations: summary: "Высокий CPU на {{ $labels.instance }}: {{ printf \"%.0f\" $value }}%" - alert: HostLowMemory expr: node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes * 100 < 10 for: 5m labels: {severity: warning} annotations: summary: "Мало памяти на {{ $labels.instance }}: доступно {{ printf \"%.0f\" $value }}%" - alert: HostDiskLow expr: node_filesystem_avail_bytes{fstype!~"tmpfs|overlay|squashfs"} / node_filesystem_size_bytes{fstype!~"tmpfs|overlay|squashfs"} * 100 < 15 for: 5m labels: {severity: warning} annotations: summary: "Кончается диск на {{ $labels.instance }} ({{ $labels.mountpoint }}): свободно {{ printf \"%.0f\" $value }}%" - alert: HostDiskCritical expr: node_filesystem_avail_bytes{fstype!~"tmpfs|overlay|squashfs"} / node_filesystem_size_bytes{fstype!~"tmpfs|overlay|squashfs"} * 100 < 5 for: 5m labels: {severity: critical} annotations: summary: "КРИТИЧНО: диск почти полон на {{ $labels.instance }} ({{ $labels.mountpoint }}): свободно {{ printf \"%.0f\" $value }}%"