feat(grafana): added alerts configuration provisioning

This commit is contained in:
ITQ
2025-07-26 05:34:35 +03:00
parent 0db6ed576a
commit 7127227350
3 changed files with 212 additions and 0 deletions
@@ -0,0 +1,17 @@
apiVersion: 1
contactPoints:
- orgId: 1
name: Telegram
receivers:
- uid: aet1srtyc40lca
type: telegram
settings:
bottoken: 7797967907:AAGZuUzzuS4LLb525rDNY52Awc2tvpsLjd4
chatid: "-1002555823797"
disable_notification: false
disable_web_page_preview: false
message: '{{ template "telegram.default.message" . }}'
parse_mode: Markdown
protect_content: false
disableResolveMessage: false
@@ -0,0 +1,141 @@
apiVersion: 1
groups:
- orgId: 1
name: Default
folder: Backend
interval: 10s
rules:
- uid: aet1xbx1yaupsb
title: Backend p99>500ms
condition: C
data:
- refId: A
relativeTimeRange:
from: 600
to: 0
datasourceUid: prometheus
model:
editorMode: code
expr: |
histogram_quantile(
0.99,
sum(
rate(
caddy_http_request_duration_seconds_bucket{instance="proxy:2019",handler="reverse_proxy",host="proxy:8080",job="caddy"}[$__rate_interval]
)
) by (le)
)
instant: true
intervalMs: 1000
legendFormat: __auto
maxDataPoints: 43200
range: false
refId: A
- refId: C
datasourceUid: __expr__
model:
conditions:
- evaluator:
params:
- 0.5
type: gte
operator:
type: and
query:
params:
- C
reducer:
params: []
type: last
type: query
datasource:
type: __expr__
uid: __expr__
expression: A
intervalMs: 1000
maxDataPoints: 43200
refId: C
type: threshold
dashboardUid: e3a78c36-2f34-4ad6-81d5-284002896829
panelId: 32
noDataState: NoData
execErrState: Error
for: 10s
keepFiringFor: 10s
annotations:
__dashboardUid__: e3a78c36-2f34-4ad6-81d5-284002896829
__panelId__: "32"
runbook_url: https://admin.adnova.itqdev.xyz
summary: p99>500ms
isPaused: false
notification_settings:
receiver: Telegram
- orgId: 1
name: Default
folder: Postgres
interval: 10s
rules:
- uid: fet1txr4slywwe
title: "> 100 QPS on Postgresql"
condition: C
data:
- refId: A
relativeTimeRange:
from: 600
to: 0
datasourceUid: prometheus
model:
editorMode: code
expr: |
sum(
irate(pg_stat_database_xact_commit{datname="postgres",instance="postgres-exporter:9187",job="postgres"}[5m])
)
+ sum(
irate(pg_stat_database_xact_rollback{datname="postgres",instance="postgres-exporter:9187",job="postgres"}[5m])
)
instant: true
intervalMs: 1000
legendFormat: __auto
maxDataPoints: 43200
range: false
refId: A
- refId: C
datasourceUid: __expr__
model:
conditions:
- evaluator:
params:
- 100
type: gte
operator:
type: and
query:
params:
- C
reducer:
params: []
type: last
type: query
datasource:
type: __expr__
uid: __expr__
expression: A
intervalMs: 1000
maxDataPoints: 43200
refId: C
type: threshold
dashboardUid: postgres-overview
panelId: 14
noDataState: NoData
execErrState: Error
for: 10s
keepFiringFor: 1m
annotations:
__dashboardUid__: postgres-overview
__panelId__: "14"
runbook_url: https://admin.adnova.itqdev.xyz
summary: Postgresql QPS exceeded 100
isPaused: false
notification_settings:
receiver: Telegram
@@ -0,0 +1,54 @@
apiVersion: 1
templates:
- orgId: 1
name: Telegram
template: |
{{ define "telegram.default.message" }} {{ if gt (len .Alerts.Firing) 0 }}
🔥🚨 *FIRE IN THE HOLE!* 🚨🔥
We've got *{{ len .Alerts.Firing }} firing alert(s)* that need your immediate attention!
{{ range .Alerts.Firing }}
---
*Alert:* `{{ .Labels.alertname }}`
{{ if .Labels.instance }}*Instance:* `{{ .Labels.instance }}`{{ end }}
*Status:* 🔴 *FIRING* since {{ .StartsAt.Format "2006-01-02 15:04:05 MST" }}
{{ if .Annotations.summary }}*Summary:* {{ .Annotations.summary }}{{ end }}
{{ if .Annotations.description }}*Description:* {{ .Annotations.description }}{{ end }}
*Labels:*
{{ range .Labels.SortedPairs }} • `{{ .Name }}` = `{{ .Value }}`
{{ end }}
{{ if gt (len .Annotations) 0 }}*Annotations:*
{{ range .Annotations.SortedPairs }} • `{{ .Name }}` = `{{ .Value }}`
{{ end }}{{ end }}
{{ if .DashboardURL }}📊 [View Dashboard]({{ .DashboardURL }})
{{ end }}{{ if .PanelURL }}📈 [View Panel]({{ .PanelURL }})
{{ end }}{{ if .GeneratorURL }}🔗 [Alert Source]({{ .GeneratorURL }})
{{ end }}{{ if .SilenceURL }}🤫 [Silence Alert]({{ .SilenceURL }})
{{ end }}--- {{ end }} {{ end }}
{{ if gt (len .Alerts.Resolved) 0 }}
✅🟢 *ALL CLEAR!* 🟢✅
Great news! *{{ len .Alerts.Resolved }} alert(s)* have been resolved.
{{ range .Alerts.Resolved }}
---
*Alert:* `{{ .Labels.alertname }}`
{{ if .Labels.instance }}*Instance:* `{{ .Labels.instance }}`{{ end }}
*Status:* ✅ *RESOLVED* at {{ .EndsAt.Format "2006-01-02 15:04:05 MST" }} (was active since {{ .StartsAt.Format "2006-01-02 15:04:05 MST" }})
{{ if .Annotations.summary }}*Summary:* {{ .Annotations.summary }}{{ end }}
{{ if .Annotations.description }}*Description:* {{ .Annotations.description }}{{ end }}
*Labels:*
{{ range .Labels.SortedPairs }} • `{{ .Name }}` = `{{ .Value }}`
{{ end }}
{{ if gt (len .Annotations) 0 }}*Annotations:*
{{ range .Annotations.SortedPairs }} • `{{ .Name }}` = `{{ .Value }}`
{{ end }}{{ end }}
{{ if .DashboardURL }}📊 [View Dashboard]({{ .DashboardURL }})
{{ end }}{{ if .PanelURL }}📈 [View Panel]({{ .PanelURL }})
{{ end }}{{ if .GeneratorURL }}🔗 [Alert Source]({{ .GeneratorURL }}) {{ end }} {{ end }} {{ end }}
{{ if or (gt (len .Alerts.Firing) 0) (gt (len .Alerts.Resolved) 0) }}
🔔 *Grafana Alertmanager:* [View All Alerts]({{ template "__alertmanagerURL" . }}) 🔔
{{ end }}
{{ end }}