From 3389a76c411d7c3bf338ca3acbad7f6bd70a23a7 Mon Sep 17 00:00:00 2001 From: Ruslan Gilfanov Date: Sun, 21 Jun 2026 22:45:25 +0300 Subject: [PATCH] =?UTF-8?q?monitoring:=20=D1=81=D0=B5=D1=82=D0=B5=D0=B2?= =?UTF-8?q?=D0=BE=D0=B9=20=D0=B4=D0=B0=D1=88=D0=B1=D0=BE=D1=80=D0=B4=20+?= =?UTF-8?q?=20node-exporter=20=D0=B2=20host-network=20(=D0=B2=D0=B8=D0=B4?= =?UTF-8?q?=D0=B8=D1=82=20eth0/wg0)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- servers/fr1.md | 2 + stacks/monitoring/docker-compose.yml | 9 ++- .../provisioning/dashboards/network.json | 78 +++++++++++++++++++ .../prometheus/targets/node/fr1.yml | 2 +- 4 files changed, 87 insertions(+), 4 deletions(-) create mode 100644 stacks/monitoring/grafana/provisioning/dashboards/network.json diff --git a/servers/fr1.md b/servers/fr1.md index d8aa98e..e04e8e6 100644 --- a/servers/fr1.md +++ b/servers/fr1.md @@ -69,6 +69,8 @@ backend=systemd banaction=ufw - Prometheus: retention 90d, слушает `127.0.0.1:9090`. Скрейпит node-exporter + cadvisor (job'ы `node`/`cadvisor`, instance `fr1`). - Grafana: `127.0.0.1:3000`, доступ снаружи через **https://mon.ruzzy.dev** (nginx + Let's Encrypt). Datasource Prometheus (uid `prometheus`) и дашборды (Node Exporter Full, cAdvisor, папка Infra) провижатся автоматически. Пароль admin — в `~/monitoring/docker-compose.yml` на сервере (в репо вынесен в `${GF_ADMIN_PASSWORD}`). - **Multi-server:** таргеты через file_sd (`prometheus/targets/{node,cadvisor}/*.yml`) — новый сервер = добавить файл + `curl -X POST .../-/reload`. См. `stacks/monitoring/README.md`. +- **node-exporter — `network_mode: host`** (видит реальные интерфейсы хоста: eth0, wg0-туннель и т.д.; иначе показывал бы только сеть docker-бриджа). Слушает host:9100, доступ ограничен ufw (`from 172.16.0.0/12 to port 9100`), Prometheus скрейпит через `host.docker.internal:9100`. nodename берётся с хоста (fr1). +- Дашборд **Network / Traffic** (`network.json`): RX/TX по интерфейсам, пакеты, ошибки/дропы, статус — с фильтром по instance/device (lo/veth/docker скрыты). ⚠️ для корректной сети на `de1` его агент тоже надо перевести в host-net (пока bridge). - DNS: `mon.ruzzy.dev` → 161.97.93.252. - Планируется: VPN-экспортёры (wireguard/openvpn/telemt) + подключение `de1` по туннелю. diff --git a/stacks/monitoring/docker-compose.yml b/stacks/monitoring/docker-compose.yml index 7ad6849..abfd6d8 100644 --- a/stacks/monitoring/docker-compose.yml +++ b/stacks/monitoring/docker-compose.yml @@ -15,6 +15,8 @@ services: - ./prometheus/targets:/etc/prometheus/targets:ro - ./prometheus/rules:/etc/prometheus/rules:ro - prometheus_data:/prometheus + extra_hosts: + - "host.docker.internal:host-gateway" ports: - '127.0.0.1:9090:9090' networks: [monitoring] @@ -48,10 +50,13 @@ services: networks: [monitoring] depends_on: [prometheus] + # node-exporter в host-network: видит реальные интерфейсы хоста (eth0, wg0, tun…). + # Слушает host:9100; доступ ограничен ufw (только docker-сети). Скрейпится через host.docker.internal. node-exporter: image: prom/node-exporter:latest container_name: node-exporter - hostname: fr1 + network_mode: host + pid: host restart: unless-stopped command: - '--path.procfs=/host/proc' @@ -62,8 +67,6 @@ services: - /proc:/host/proc:ro - /sys:/host/sys:ro - /:/rootfs:ro - pid: host - networks: [monitoring] cadvisor: image: gcr.io/cadvisor/cadvisor:latest diff --git a/stacks/monitoring/grafana/provisioning/dashboards/network.json b/stacks/monitoring/grafana/provisioning/dashboards/network.json new file mode 100644 index 0000000..07c3b2b --- /dev/null +++ b/stacks/monitoring/grafana/provisioning/dashboards/network.json @@ -0,0 +1,78 @@ +{ + "uid": "network-traffic", + "title": "Network / Traffic", + "tags": ["network", "infra"], + "timezone": "browser", + "schemaVersion": 39, + "version": 1, + "refresh": "30s", + "time": { "from": "now-3h", "to": "now" }, + "templating": { + "list": [ + { + "name": "instance", "type": "query", + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "query": { "query": "label_values(node_network_receive_bytes_total, instance)", "refId": "v" }, + "refresh": 2, "includeAll": false, "current": { "text": "fr1", "value": "fr1" } + }, + { + "name": "device", "type": "query", + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "query": { "query": "label_values(node_network_receive_bytes_total{instance=~\"$instance\",device!~\"lo|veth.*|br-.*|docker.*\"}, device)", "refId": "v" }, + "refresh": 2, "includeAll": true, "multi": true, "current": { "text": "All", "value": "$__all" } + } + ] + }, + "panels": [ + { + "type": "timeseries", "title": "Входящий трафик (RX)", "gridPos": { "h": 8, "w": 12, "x": 0, "y": 0 }, + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { "defaults": { "unit": "bps", "custom": { "drawStyle": "line", "lineWidth": 2, "fillOpacity": 10 } } }, + "targets": [ { "expr": "rate(node_network_receive_bytes_total{instance=~\"$instance\",device=~\"$device\"}[$__rate_interval])*8", "legendFormat": "{{device}}", "refId": "A" } ] + }, + { + "type": "timeseries", "title": "Исходящий трафик (TX)", "gridPos": { "h": 8, "w": 12, "x": 12, "y": 0 }, + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { "defaults": { "unit": "bps", "custom": { "drawStyle": "line", "lineWidth": 2, "fillOpacity": 10 } } }, + "targets": [ { "expr": "rate(node_network_transmit_bytes_total{instance=~\"$instance\",device=~\"$device\"}[$__rate_interval])*8", "legendFormat": "{{device}}", "refId": "A" } ] + }, + { + "type": "stat", "title": "RX суммарно (сейчас)", "gridPos": { "h": 6, "w": 6, "x": 0, "y": 8 }, + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { "defaults": { "unit": "bps", "color": { "mode": "thresholds" }, "thresholds": { "steps": [ { "color": "green", "value": null } ] } } }, + "targets": [ { "expr": "sum(rate(node_network_receive_bytes_total{instance=~\"$instance\",device=~\"$device\"}[$__rate_interval])*8)", "refId": "A" } ] + }, + { + "type": "stat", "title": "TX суммарно (сейчас)", "gridPos": { "h": 6, "w": 6, "x": 6, "y": 8 }, + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { "defaults": { "unit": "bps", "color": { "mode": "thresholds" }, "thresholds": { "steps": [ { "color": "blue", "value": null } ] } } }, + "targets": [ { "expr": "sum(rate(node_network_transmit_bytes_total{instance=~\"$instance\",device=~\"$device\"}[$__rate_interval])*8)", "refId": "A" } ] + }, + { + "type": "timeseries", "title": "Пакеты/с (RX +, TX −)", "gridPos": { "h": 6, "w": 12, "x": 12, "y": 8 }, + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { "defaults": { "unit": "pps", "custom": { "drawStyle": "line", "lineWidth": 1, "fillOpacity": 5 } } }, + "targets": [ + { "expr": "rate(node_network_receive_packets_total{instance=~\"$instance\",device=~\"$device\"}[$__rate_interval])", "legendFormat": "rx {{device}}", "refId": "A" }, + { "expr": "-rate(node_network_transmit_packets_total{instance=~\"$instance\",device=~\"$device\"}[$__rate_interval])", "legendFormat": "tx {{device}}", "refId": "B" } + ] + }, + { + "type": "timeseries", "title": "Ошибки и дропы /с", "gridPos": { "h": 7, "w": 12, "x": 0, "y": 14 }, + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { "defaults": { "unit": "pps", "custom": { "drawStyle": "line", "lineWidth": 2, "fillOpacity": 10 } } }, + "targets": [ + { "expr": "rate(node_network_receive_errs_total{instance=~\"$instance\",device=~\"$device\"}[$__rate_interval])", "legendFormat": "rx errs {{device}}", "refId": "A" }, + { "expr": "rate(node_network_transmit_errs_total{instance=~\"$instance\",device=~\"$device\"}[$__rate_interval])", "legendFormat": "tx errs {{device}}", "refId": "B" }, + { "expr": "rate(node_network_receive_drop_total{instance=~\"$instance\",device=~\"$device\"}[$__rate_interval])", "legendFormat": "rx drop {{device}}", "refId": "C" }, + { "expr": "rate(node_network_transmit_drop_total{instance=~\"$instance\",device=~\"$device\"}[$__rate_interval])", "legendFormat": "tx drop {{device}}", "refId": "D" } + ] + }, + { + "type": "timeseries", "title": "Статус интерфейсов (up=1)", "gridPos": { "h": 7, "w": 12, "x": 12, "y": 14 }, + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { "defaults": { "unit": "short", "max": 1, "min": 0, "custom": { "drawStyle": "line", "lineWidth": 2, "fillOpacity": 0 } } }, + "targets": [ { "expr": "node_network_up{instance=~\"$instance\",device=~\"$device\"}", "legendFormat": "{{device}}", "refId": "A" } ] + } + ] +} diff --git a/stacks/monitoring/prometheus/targets/node/fr1.yml b/stacks/monitoring/prometheus/targets/node/fr1.yml index a14ad82..d6ca4dd 100644 --- a/stacks/monitoring/prometheus/targets/node/fr1.yml +++ b/stacks/monitoring/prometheus/targets/node/fr1.yml @@ -1,3 +1,3 @@ -- targets: ['node-exporter:9100'] +- targets: ["host.docker.internal:9100"] labels: instance: fr1