使用 docker-compose 一键部署 Prometheus 监控系统
在日常开发和运维工作中,了解系统和服务的运行状态至关重要。Prometheus 作为当下最流行的开源监控系统之一,以其强大的指标采集能力和灵活的查询语言,在云原生时代占据了重要地位。
然而,手动部署 Prometheus 及其配套组件(如 Alertmanager、Grafana)常常需要繁琐的配置,这对初学者或需要快速搭建监控环境的用户来说并不友好。幸运的是,借助 Docker Compose,我们可以像搭积木一样轻松拉起一套完整的监控系统。
本文将带你一步步通过 Docker Compose 快速部署 Prometheus、Alertmanager 和 Grafana,实现一键式部署,适用于个人项目、测试环境或小型团队的监控需求。
官方地址信息
1
2
3
4
5
6
7
8
|
# 官方组件下载地址
https://prometheus.io/download/
# 官方文档地址
https://prometheus.io/docs/introduction/overview/
# GitHub 地址
https://github.com/prometheus/prometheus
|
持久化目录
1
2
3
4
5
6
7
8
9
10
11
12
13
14
|
# prometheus持久化目录
mkdir -pv /data/monitor/prometheus/{conf/{alert_rule,file_sd},data}
# alertmanager持久化目录
mkdir -pv /data/monitor/alertmanager/data
# grafana持久化目录
mkdir -pv /data/monitor/grafana/data
# 钉钉报警插件
mkdir -pv /data/monitor/dingtalk/templates
# 自定义监控指标收集脚本、指标数据目录
mkdir -pv /data/monitor/defined_monitor/{script,prom_file,logs}
|
Prometheus 配置
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
|
cat > /data/monitor/prometheus/conf/prometheus.yaml <<'EOF'
# 全局配置
global:
# 采集间隔
scrape_interval: 8s
# 评估间隔
evaluation_interval: 15s
rule_files:
- "/etc/prometheus/alert_rule/*.yaml"
# 报警插件IP端口
alerting:
alertmanagers:
- static_configs:
- targets:
- "alertmanager:9093"
# 定义采集作业名称及IP端口
scrape_configs:
- job_name: "local_monitor"
honor_timestamps: true
scrape_interval: 8s
scrape_timeout: 8s
metrics_path: /metrics
scheme: http
follow_redirects: true
static_configs:
- targets:
- "prometheus:9090"
- "alertmanager:9093"
- "cadvisor:8080"
- job_name: "Node"
honor_timestamps: true
scrape_interval: 8s
scrape_timeout: 8s
metrics_path: /metrics
scheme: http
follow_redirects: true
file_sd_configs:
- files:
- /etc/prometheus/file_sd/node_targets.yaml
refresh_interval: 8s
EOF
|
Alertmanager 配置
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
|
cat > /data/monitor/alertmanager/alertmanager.yaml <<'EOF'
# 告警路由(即告警信息分组)
route:
group_by: ["alertname"]
group_wait: 30s
group_interval: 15s
repeat_interval: 10m # 接受单位 (m / h)
receiver: "webhook" # 默认接收器为email
# 自定义html模板,包括告警及恢复模板
templates:
- "/etc/alertmanager/*.tmpl"
# 告警信息接收人配置
receivers:
# 钉钉
- name: "webhook" # 警报接收者名称
webhook_configs: # 表示使用钉钉通知
- url: "http://dingtalk:8060/dingtalk/webhook1/send"
send_resolved: true # 告警恢复通知
# 告警抑制,主要用于减少告警的次数,防止"告警轰炸"
inhibit_rules:
- source_match:
severity: "critical"
target_match:
severity: "warning"
equal: ["alertname", "dev", "instance"]
EOF
|
钉钉报警插件配置
1
2
3
4
5
6
7
8
9
10
11
12
13
|
cat > /data/monitor/dingtalk/config.yaml <<'EOF'
# 超时时间为5秒
timeout: 5s
# 自定义模板路径
templates:
- /etc/dingtalk/templates/dingtalk.tmpl
targets:
# 定义接收者信息(url、secret都在钉钉机器人配置,下附图解)
webhook1:
url: https://oapi.dingtalk.com/robot/send?access_token=12be608***************************************
secret: SEC41c7677c************************************
EOF
|
url:

secret:

Compose 编排
本次容器镜像地址全部是国内地址直接运行容器组即可
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
|
cat > /data/monitor/monitor-compose.yaml <<'EOF'
networks:
local_bridge:
external: true
# 服务列表
services:
# 钉钉报警插件
dingtalk:
container_name: dingtalk
image: swr.cn-north-4.myhuaweicloud.com/ddn-k8s/docker.io/timonwong/prometheus-webhook-dingtalk:v2.1.0
hostname: dingtalk
user: "0:0"
restart: always
ports:
- "8060:8060"
command:
- "--config.file=/etc/dingtalk/config.yaml"
volumes:
- /data/monitor/dingtalk/config.yaml:/etc/dingtalk/config.yaml:ro
- /data/monitor/dingtalk/templates:/etc/dingtalk/templates:ro
- /usr/share/zoneinfo/PRC:/etc/localtime:ro
networks:
- local_bridge
# 告警组件
alertmanager:
container_name: alertmanager
image: swr.cn-north-4.myhuaweicloud.com/ddn-k8s/docker.io/prom/alertmanager:v0.27.0
hostname: alertmanager
user: "0:0"
restart: always
ports:
- "9093:9093"
command:
- "--config.file=/etc/alertmanager/alertmanager.yaml"
volumes:
- /data/monitor/alertmanager:/etc/alertmanager:ro
- /data/monitor/alertmanager/data:/alertmanager:rw
- /usr/share/zoneinfo/PRC:/etc/localtime:ro
depends_on:
- dingtalk
networks:
- local_bridge
# 用于获取docker容器的指标
cadvisor:
container_name: cadvisor
image: swr.cn-north-4.myhuaweicloud.com/ddn-k8s/docker.io/google/cadvisor:v0.33.0
hostname: cadvisor
user: "0:0"
security_opt:
- seccomp:unconfined
restart: always
ports:
- "8080:8080"
volumes:
- /:/rootfs:ro
- /var/run:/var/run:rw
- /sys:/sys:ro
- /data/docker/data:/var/lib/docker:ro
- /dev/disk/:/dev/disk:ro
networks:
- local_bridge
# 核心监控组件
prometheus:
container_name: prometheus
image: swr.cn-north-4.myhuaweicloud.com/ddn-k8s/docker.io/prom/prometheus:v3.2.1
hostname: prometheus
user: "0:0"
restart: always
ports:
- "9090:9090"
command:
- "--config.file=/etc/prometheus/prometheus.yaml"
- "--storage.tsdb.path=/prometheus"
- "--storage.tsdb.retention.time=90d"
- "--web.enable-lifecycle"
volumes:
- /data/monitor/prometheus/conf:/etc/prometheus:ro
- /data/monitor/prometheus/data:/prometheus:rw
- /usr/share/zoneinfo/PRC:/etc/localtime:ro
depends_on:
- alertmanager
networks:
- local_bridge
# 监控的 Web-UI 好用的可视化集成组件
grafana:
container_name: grafana
image: swr.cn-north-4.myhuaweicloud.com/ddn-k8s/docker.io/grafana/grafana:11.2.5
hostname: grafana
user: "0:0"
restart: always
ports:
- "3000:3000"
volumes:
- /data/monitor/grafana/defaults.ini:/usr/share/grafana/conf/defaults.ini:ro
- /data/monitor/grafana/data:/var/lib/grafana:rw
- /usr/share/zoneinfo/PRC:/etc/localtime:ro
networks:
- local_bridge
EOF
|
启动容器组
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
|
# 创建桥接网络
docker network create --driver bridge local_bridge
# 启动容器组
docker-compose -f /data/monitor/monitor-compose.yaml up -d
# 查看容器组
docker-compose -f /data/monitor/monitor-compose.yaml ps
# 删除容器组
docker-compose -f /data/monitor/monitor-compose.yaml down
# 热加载命令:
curl -X POST http://localhost:9090/-/reload
## 验证容器组状态(全部是 UP 状态即可)
docker-compose -f /data/monitor/monitor-compose.yaml ps
|
UI 界面访问
1
2
3
4
5
6
7
8
|
## Prometheus UI 界面
http://IP:9090/
## Alertmanager UI 界面
http://IP:9093/
## Cadvisor UI 界面
http://IP:8080/containers/
|
部署节点监控导出器
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
|
wget 'https://github.com/prometheus/node_exporter/releases/download/v1.8.2/node_exporter-1.8.2.linux-amd64.tar.gz' -P /data/soft/
mkdir -pv /data/monitor
tar xf /data/soft/node_exporter-*.linux-amd64.tar.gz -C /data/monitor/
mv /data/monitor/node_exporter-*.linux-amd64 /data/monitor/node_exporter
# 编写 Unit 启动文件
cat > /etc/systemd/system/node_exporter.service <<'EOF'
[Unit]
Description=node_exporter
After=network.target
[Service]
User=root
Group=root
WorkingDirectory=/data/monitor/node_exporter
ExecStart=/data/monitor/node_exporter/node_exporter \
--collector.textfile.directory=/data/monitor/defined_monitor/prom_file/ \
--web.listen-address=:9100
ExecStop=/bin/kill -SIGTERM $MAINPID
StandardOutput=syslog
StandardError=syslog
SyslogIdentifier=node_exporter
Restart=always
RestartSec=20s
[Install]
WantedBy=multi-user.target
Alias=node_exporter
EOF
systemctl daemon-reload && systemctl enable --now node_exporter.service
systemctl status node_exporter.service
|
监控节点清单
1
2
3
4
5
6
7
8
9
10
11
|
## 在 Prometheus 监控主机填写监控清单
cat > /data/monitor/prometheus/conf/file_sd/node_targets.yaml <<'EOF'
- targets:
- 192.168.1.31:9100
- 192.168.1.32:9100
- 192.168.1.33:9100
- 192.168.1.34:9100
- 192.168.1.35:9100
- 192.168.1.36:9100
- 192.168.1.37:9100
EOF
|
配置 Grafana 可视化
1
2
|
# 浏览器访问
http://IP:3000/
|

1
2
|
导入面板 ID :16098
选择默认 Prometheus 源
|

结语
到这里,我们已经顺利使用 Docker Compose 部署起了一套包含 Prometheus、Alertmanager 和 Grafana 的监控系统。整个过程不仅简洁高效,而且极具可扩展性——无论是学习实践还是为小型项目搭建监控环境,这种方式都能快速上手。
或许一开始 Prometheus 的配置文件看起来有些陌生,但随着动手实践,相信你已经掌握了它的核心逻辑。借助 Compose 的一键部署能力,你也体验到了「基础设施即代码」的便捷性。
当然,监控的世界远不止于此。你可以继续探索:
- 接入 Node Exporter 监控服务器资源
- 使用 Blackbox Exporter 做 HTTP/ICMP 探活
- 利用 Grafana 打造炫酷的可视化仪表盘
- 配置 Alertmanager 实现钉钉、企业微信告警推送
欢迎你在这个过程中不断试错、持续优化,逐步打造属于你自己的监控体系。如果你对某个方向感兴趣,别忘了关注后续内容,我们还会深入挖掘更多实用玩法!
💬 如果这篇文章对你有帮助,欢迎点赞、分享或留言交流,你的支持是我持续更新的动力!