monitoring: сетевой дашборд + node-exporter в host-network (видит eth0/wg0)
This commit is contained in:
@@ -69,6 +69,8 @@ backend=systemd banaction=ufw
|
|||||||
- Prometheus: retention 90d, слушает `127.0.0.1:9090`. Скрейпит node-exporter + cadvisor (job'ы `node`/`cadvisor`, instance `fr1`).
|
- Prometheus: retention 90d, слушает `127.0.0.1:9090`. Скрейпит node-exporter + cadvisor (job'ы `node`/`cadvisor`, instance `fr1`).
|
||||||
- Grafana: `127.0.0.1:3000`, доступ снаружи через **https://mon.ruzzy.dev** (nginx + Let's Encrypt). Datasource Prometheus (uid `prometheus`) и дашборды (Node Exporter Full, cAdvisor, папка Infra) провижатся автоматически. Пароль admin — в `~/monitoring/docker-compose.yml` на сервере (в репо вынесен в `${GF_ADMIN_PASSWORD}`).
|
- Grafana: `127.0.0.1:3000`, доступ снаружи через **https://mon.ruzzy.dev** (nginx + Let's Encrypt). Datasource Prometheus (uid `prometheus`) и дашборды (Node Exporter Full, cAdvisor, папка Infra) провижатся автоматически. Пароль admin — в `~/monitoring/docker-compose.yml` на сервере (в репо вынесен в `${GF_ADMIN_PASSWORD}`).
|
||||||
- **Multi-server:** таргеты через file_sd (`prometheus/targets/{node,cadvisor}/*.yml`) — новый сервер = добавить файл + `curl -X POST .../-/reload`. См. `stacks/monitoring/README.md`.
|
- **Multi-server:** таргеты через file_sd (`prometheus/targets/{node,cadvisor}/*.yml`) — новый сервер = добавить файл + `curl -X POST .../-/reload`. См. `stacks/monitoring/README.md`.
|
||||||
|
- **node-exporter — `network_mode: host`** (видит реальные интерфейсы хоста: eth0, wg0-туннель и т.д.; иначе показывал бы только сеть docker-бриджа). Слушает host:9100, доступ ограничен ufw (`from 172.16.0.0/12 to port 9100`), Prometheus скрейпит через `host.docker.internal:9100`. nodename берётся с хоста (fr1).
|
||||||
|
- Дашборд **Network / Traffic** (`network.json`): RX/TX по интерфейсам, пакеты, ошибки/дропы, статус — с фильтром по instance/device (lo/veth/docker скрыты). ⚠️ для корректной сети на `de1` его агент тоже надо перевести в host-net (пока bridge).
|
||||||
- DNS: `mon.ruzzy.dev` → 161.97.93.252.
|
- DNS: `mon.ruzzy.dev` → 161.97.93.252.
|
||||||
- Планируется: VPN-экспортёры (wireguard/openvpn/telemt) + подключение `de1` по туннелю.
|
- Планируется: VPN-экспортёры (wireguard/openvpn/telemt) + подключение `de1` по туннелю.
|
||||||
|
|
||||||
|
|||||||
@@ -15,6 +15,8 @@ services:
|
|||||||
- ./prometheus/targets:/etc/prometheus/targets:ro
|
- ./prometheus/targets:/etc/prometheus/targets:ro
|
||||||
- ./prometheus/rules:/etc/prometheus/rules:ro
|
- ./prometheus/rules:/etc/prometheus/rules:ro
|
||||||
- prometheus_data:/prometheus
|
- prometheus_data:/prometheus
|
||||||
|
extra_hosts:
|
||||||
|
- "host.docker.internal:host-gateway"
|
||||||
ports:
|
ports:
|
||||||
- '127.0.0.1:9090:9090'
|
- '127.0.0.1:9090:9090'
|
||||||
networks: [monitoring]
|
networks: [monitoring]
|
||||||
@@ -48,10 +50,13 @@ services:
|
|||||||
networks: [monitoring]
|
networks: [monitoring]
|
||||||
depends_on: [prometheus]
|
depends_on: [prometheus]
|
||||||
|
|
||||||
|
# node-exporter в host-network: видит реальные интерфейсы хоста (eth0, wg0, tun…).
|
||||||
|
# Слушает host:9100; доступ ограничен ufw (только docker-сети). Скрейпится через host.docker.internal.
|
||||||
node-exporter:
|
node-exporter:
|
||||||
image: prom/node-exporter:latest
|
image: prom/node-exporter:latest
|
||||||
container_name: node-exporter
|
container_name: node-exporter
|
||||||
hostname: fr1
|
network_mode: host
|
||||||
|
pid: host
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
command:
|
command:
|
||||||
- '--path.procfs=/host/proc'
|
- '--path.procfs=/host/proc'
|
||||||
@@ -62,8 +67,6 @@ services:
|
|||||||
- /proc:/host/proc:ro
|
- /proc:/host/proc:ro
|
||||||
- /sys:/host/sys:ro
|
- /sys:/host/sys:ro
|
||||||
- /:/rootfs:ro
|
- /:/rootfs:ro
|
||||||
pid: host
|
|
||||||
networks: [monitoring]
|
|
||||||
|
|
||||||
cadvisor:
|
cadvisor:
|
||||||
image: gcr.io/cadvisor/cadvisor:latest
|
image: gcr.io/cadvisor/cadvisor:latest
|
||||||
|
|||||||
@@ -0,0 +1,78 @@
|
|||||||
|
{
|
||||||
|
"uid": "network-traffic",
|
||||||
|
"title": "Network / Traffic",
|
||||||
|
"tags": ["network", "infra"],
|
||||||
|
"timezone": "browser",
|
||||||
|
"schemaVersion": 39,
|
||||||
|
"version": 1,
|
||||||
|
"refresh": "30s",
|
||||||
|
"time": { "from": "now-3h", "to": "now" },
|
||||||
|
"templating": {
|
||||||
|
"list": [
|
||||||
|
{
|
||||||
|
"name": "instance", "type": "query",
|
||||||
|
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||||
|
"query": { "query": "label_values(node_network_receive_bytes_total, instance)", "refId": "v" },
|
||||||
|
"refresh": 2, "includeAll": false, "current": { "text": "fr1", "value": "fr1" }
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "device", "type": "query",
|
||||||
|
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||||
|
"query": { "query": "label_values(node_network_receive_bytes_total{instance=~\"$instance\",device!~\"lo|veth.*|br-.*|docker.*\"}, device)", "refId": "v" },
|
||||||
|
"refresh": 2, "includeAll": true, "multi": true, "current": { "text": "All", "value": "$__all" }
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"panels": [
|
||||||
|
{
|
||||||
|
"type": "timeseries", "title": "Входящий трафик (RX)", "gridPos": { "h": 8, "w": 12, "x": 0, "y": 0 },
|
||||||
|
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||||
|
"fieldConfig": { "defaults": { "unit": "bps", "custom": { "drawStyle": "line", "lineWidth": 2, "fillOpacity": 10 } } },
|
||||||
|
"targets": [ { "expr": "rate(node_network_receive_bytes_total{instance=~\"$instance\",device=~\"$device\"}[$__rate_interval])*8", "legendFormat": "{{device}}", "refId": "A" } ]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "timeseries", "title": "Исходящий трафик (TX)", "gridPos": { "h": 8, "w": 12, "x": 12, "y": 0 },
|
||||||
|
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||||
|
"fieldConfig": { "defaults": { "unit": "bps", "custom": { "drawStyle": "line", "lineWidth": 2, "fillOpacity": 10 } } },
|
||||||
|
"targets": [ { "expr": "rate(node_network_transmit_bytes_total{instance=~\"$instance\",device=~\"$device\"}[$__rate_interval])*8", "legendFormat": "{{device}}", "refId": "A" } ]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "stat", "title": "RX суммарно (сейчас)", "gridPos": { "h": 6, "w": 6, "x": 0, "y": 8 },
|
||||||
|
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||||
|
"fieldConfig": { "defaults": { "unit": "bps", "color": { "mode": "thresholds" }, "thresholds": { "steps": [ { "color": "green", "value": null } ] } } },
|
||||||
|
"targets": [ { "expr": "sum(rate(node_network_receive_bytes_total{instance=~\"$instance\",device=~\"$device\"}[$__rate_interval])*8)", "refId": "A" } ]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "stat", "title": "TX суммарно (сейчас)", "gridPos": { "h": 6, "w": 6, "x": 6, "y": 8 },
|
||||||
|
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||||
|
"fieldConfig": { "defaults": { "unit": "bps", "color": { "mode": "thresholds" }, "thresholds": { "steps": [ { "color": "blue", "value": null } ] } } },
|
||||||
|
"targets": [ { "expr": "sum(rate(node_network_transmit_bytes_total{instance=~\"$instance\",device=~\"$device\"}[$__rate_interval])*8)", "refId": "A" } ]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "timeseries", "title": "Пакеты/с (RX +, TX −)", "gridPos": { "h": 6, "w": 12, "x": 12, "y": 8 },
|
||||||
|
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||||
|
"fieldConfig": { "defaults": { "unit": "pps", "custom": { "drawStyle": "line", "lineWidth": 1, "fillOpacity": 5 } } },
|
||||||
|
"targets": [
|
||||||
|
{ "expr": "rate(node_network_receive_packets_total{instance=~\"$instance\",device=~\"$device\"}[$__rate_interval])", "legendFormat": "rx {{device}}", "refId": "A" },
|
||||||
|
{ "expr": "-rate(node_network_transmit_packets_total{instance=~\"$instance\",device=~\"$device\"}[$__rate_interval])", "legendFormat": "tx {{device}}", "refId": "B" }
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "timeseries", "title": "Ошибки и дропы /с", "gridPos": { "h": 7, "w": 12, "x": 0, "y": 14 },
|
||||||
|
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||||
|
"fieldConfig": { "defaults": { "unit": "pps", "custom": { "drawStyle": "line", "lineWidth": 2, "fillOpacity": 10 } } },
|
||||||
|
"targets": [
|
||||||
|
{ "expr": "rate(node_network_receive_errs_total{instance=~\"$instance\",device=~\"$device\"}[$__rate_interval])", "legendFormat": "rx errs {{device}}", "refId": "A" },
|
||||||
|
{ "expr": "rate(node_network_transmit_errs_total{instance=~\"$instance\",device=~\"$device\"}[$__rate_interval])", "legendFormat": "tx errs {{device}}", "refId": "B" },
|
||||||
|
{ "expr": "rate(node_network_receive_drop_total{instance=~\"$instance\",device=~\"$device\"}[$__rate_interval])", "legendFormat": "rx drop {{device}}", "refId": "C" },
|
||||||
|
{ "expr": "rate(node_network_transmit_drop_total{instance=~\"$instance\",device=~\"$device\"}[$__rate_interval])", "legendFormat": "tx drop {{device}}", "refId": "D" }
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "timeseries", "title": "Статус интерфейсов (up=1)", "gridPos": { "h": 7, "w": 12, "x": 12, "y": 14 },
|
||||||
|
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||||
|
"fieldConfig": { "defaults": { "unit": "short", "max": 1, "min": 0, "custom": { "drawStyle": "line", "lineWidth": 2, "fillOpacity": 0 } } },
|
||||||
|
"targets": [ { "expr": "node_network_up{instance=~\"$instance\",device=~\"$device\"}", "legendFormat": "{{device}}", "refId": "A" } ]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
@@ -1,3 +1,3 @@
|
|||||||
- targets: ['node-exporter:9100']
|
- targets: ["host.docker.internal:9100"]
|
||||||
labels:
|
labels:
|
||||||
instance: fr1
|
instance: fr1
|
||||||
|
|||||||
Reference in New Issue
Block a user