Precisei de criar o docker-compose.yml e configurar o alertmanager para receber alertas.
Caso alguem queira testar:
# docker-compose.yml
version: '3.8'
services:
prometheus:
image: prom/prometheus:latest
container_name: prometheus
ports:
- "9090:9090"
volumes:
- ./prometheus.yml:/etc/prometheus/prometheus.yml:ro
- ./alert.rules.yml:/etc/prometheus/alert.rules.yml:ro
- prometheus_data:/prometheus # for persistent data
command:
- --config.file=/etc/prometheus/prometheus.yml
- --storage.tsdb.path=/prometheus
# Example target service (e.g., Node Exporter for host metrics)
node-exporter:
image: prom/node-exporter:latest
container_name: node_exporter
ports:
- "9100:9100"
alert-manager:
image: prom/alertmanager:latest
container_name: alertmanager
ports:
- "9093:9093"
volumes:
prometheus_data:
Arquivo prometheus.yml:
scrape_configs:
- job_name: 'node_exporter'
static_configs:
- targets: ['172.17.0.1:9100']
alerting:
alertmanagers:
- static_configs:
- targets: ['172.17.0.1:9093']
global:
scrape_interval: 1s
rule_files:
- 'alert.rules.yml'
Arquivo alert.rules.yml
groups:
- name: example
rules:
- alert: Uptime
expr: up == 0
for: 15s
labels:
severity: critical
annotations:
summary: "O servidor {{ $labels.alias }} está offline!"
description: "O servidor {{ $labels.alias }} parou de responder."
- alert: AlwaysFiring
expr: vector(1)
for: 3s
annotations:
summary: "CPU usage is too high"
- alert: CountGreaterThan5
expr: ping_request_count > 0
for: 1s