Compare commits

...

15 Commits

15 changed files with 10552 additions and 18 deletions
+22 -10
View File
@@ -1,3 +1,11 @@
{
admin :2019
metrics {
per_host
}
}
(basic-auth) {
basic_auth {
admin $2a$14$2zQilpLka2h8Sn1mmOLAAezwDN8Zy8Ta36WECk4qt5MTn3CWksR0m
@@ -6,29 +14,33 @@
adnova.itqdev.xyz {
@healthPath path /health /health/*
handle @healthPath {
import basic-auth
reverse_proxy http://backend:8080
}
handle @healthPath {
import basic-auth
reverse_proxy http://backend:8080
}
handle_path /static/* {
reverse_proxy http://backend-staticfiles:80
}
handle_path /static/* {
reverse_proxy http://backend-staticfiles:80
}
reverse_proxy http://backend:8080
}
:8080 {
reverse_proxy http://backend:8080
}
admin.adnova.itqdev.xyz {
import basic-auth
import basic-auth
root * /var/www/admin
file_server
}
loadtest.adnova.itqdev.xyz {
import basic-auth
import basic-auth
reverse_proxy http://loadtest:5001
reverse_proxy http://loadtest:5001
}
grafana.adnova.itqdev.xyz {
+1 -1
View File
@@ -52,7 +52,7 @@
name: "Loadtest",
link: "https://loadtest.adnova.itqdev.xyz",
username: "admin",
password: "proooooood",
password: "kit2025_observability",
description: "Stress test utility with multiple profiles and ability to load mocks."
}
]
+1 -1
View File
@@ -48,7 +48,7 @@ domain = localhost
enforce_domain = false
# The full public facing url
root_url = %(protocol)s://%(domain)s:%(http_port)s/
root_url = https://grafana.adnova.itqdev.xyz
# Serve Grafana from subpath specified in `root_url` setting. By default it is set to `false` for compatibility reasons.
serve_from_sub_path = false
@@ -0,0 +1,17 @@
apiVersion: 1
contactPoints:
- orgId: 1
name: Telegram
receivers:
- uid: aet1srtyc40lca
type: telegram
settings:
bottoken: 7797967907:AAGZuUzzuS4LLb525rDNY52Awc2tvpsLjd4
chatid: "-1002555823797"
disable_notification: false
disable_web_page_preview: false
message: '{{ template "telegram.default.message" . }}'
parse_mode: Markdown
protect_content: false
disableResolveMessage: false
@@ -0,0 +1,141 @@
apiVersion: 1
groups:
- orgId: 1
name: Default
folder: Backend
interval: 10s
rules:
- uid: aet1xbx1yaupsb
title: Backend p99>500ms
condition: C
data:
- refId: A
relativeTimeRange:
from: 600
to: 0
datasourceUid: prometheus
model:
editorMode: code
expr: |
histogram_quantile(
0.99,
sum(
rate(
caddy_http_request_duration_seconds_bucket{instance="proxy:2019",handler="reverse_proxy",host="proxy:8080",job="caddy"}[$__rate_interval]
)
) by (le)
)
instant: true
intervalMs: 1000
legendFormat: __auto
maxDataPoints: 43200
range: false
refId: A
- refId: C
datasourceUid: __expr__
model:
conditions:
- evaluator:
params:
- 0.5
type: gte
operator:
type: and
query:
params:
- C
reducer:
params: []
type: last
type: query
datasource:
type: __expr__
uid: __expr__
expression: A
intervalMs: 1000
maxDataPoints: 43200
refId: C
type: threshold
dashboardUid: e3a78c36-2f34-4ad6-81d5-284002896829
panelId: 32
noDataState: OK
execErrState: Error
for: 10s
keepFiringFor: 10s
annotations:
__dashboardUid__: e3a78c36-2f34-4ad6-81d5-284002896829
__panelId__: "32"
runbook_url: https://admin.adnova.itqdev.xyz
summary: p99>500ms
isPaused: false
notification_settings:
receiver: Telegram
- orgId: 1
name: Default
folder: Postgres
interval: 10s
rules:
- uid: fet1txr4slywwe
title: "> 100 QPS on Postgresql"
condition: C
data:
- refId: A
relativeTimeRange:
from: 600
to: 0
datasourceUid: prometheus
model:
editorMode: code
expr: |
sum(
irate(pg_stat_database_xact_commit{datname="postgres",instance="postgres-exporter:9187",job="postgres"}[5m])
)
+ sum(
irate(pg_stat_database_xact_rollback{datname="postgres",instance="postgres-exporter:9187",job="postgres"}[5m])
)
instant: true
intervalMs: 1000
legendFormat: __auto
maxDataPoints: 43200
range: false
refId: A
- refId: C
datasourceUid: __expr__
model:
conditions:
- evaluator:
params:
- 100
type: gte
operator:
type: and
query:
params:
- C
reducer:
params: []
type: last
type: query
datasource:
type: __expr__
uid: __expr__
expression: A
intervalMs: 1000
maxDataPoints: 43200
refId: C
type: threshold
dashboardUid: postgres-overview
panelId: 14
noDataState: NoData
execErrState: Error
for: 10s
keepFiringFor: 1m
annotations:
__dashboardUid__: postgres-overview
__panelId__: "14"
runbook_url: https://admin.adnova.itqdev.xyz
summary: Postgresql QPS exceeded 100
isPaused: false
notification_settings:
receiver: Telegram
@@ -0,0 +1,54 @@
apiVersion: 1
templates:
- orgId: 1
name: Telegram
template: |
{{ define "telegram.default.message" }} {{ if gt (len .Alerts.Firing) 0 }}
🔥🚨 *FIRE IN THE HOLE!* 🚨🔥
We've got *{{ len .Alerts.Firing }} firing alert(s)* that need your immediate attention!
{{ range .Alerts.Firing }}
---
*Alert:* `{{ .Labels.alertname }}`
{{ if .Labels.instance }}*Instance:* `{{ .Labels.instance }}`{{ end }}
*Status:* 🔴 *FIRING* since {{ .StartsAt.Format "2006-01-02 15:04:05 MST" }}
{{ if .Annotations.summary }}*Summary:* {{ .Annotations.summary }}{{ end }}
{{ if .Annotations.description }}*Description:* {{ .Annotations.description }}{{ end }}
*Labels:*
{{ range .Labels.SortedPairs }} • `{{ .Name }}` = `{{ .Value }}`
{{ end }}
{{ if gt (len .Annotations) 0 }}*Annotations:*
{{ range .Annotations.SortedPairs }} • `{{ .Name }}` = `{{ .Value }}`
{{ end }}{{ end }}
{{ if .DashboardURL }}📊 [View Dashboard]({{ .DashboardURL }})
{{ end }}{{ if .PanelURL }}📈 [View Panel]({{ .PanelURL }})
{{ end }}{{ if .GeneratorURL }}🔗 [Alert Source]({{ .GeneratorURL }})
{{ end }}{{ if .SilenceURL }}🤫 [Silence Alert]({{ .SilenceURL }})
{{ end }}--- {{ end }} {{ end }}
{{ if gt (len .Alerts.Resolved) 0 }}
✅🟢 *ALL CLEAR!* 🟢✅
Great news! *{{ len .Alerts.Resolved }} alert(s)* have been resolved.
{{ range .Alerts.Resolved }}
---
*Alert:* `{{ .Labels.alertname }}`
{{ if .Labels.instance }}*Instance:* `{{ .Labels.instance }}`{{ end }}
*Status:* ✅ *RESOLVED* at {{ .EndsAt.Format "2006-01-02 15:04:05 MST" }} (was active since {{ .StartsAt.Format "2006-01-02 15:04:05 MST" }})
{{ if .Annotations.summary }}*Summary:* {{ .Annotations.summary }}{{ end }}
{{ if .Annotations.description }}*Description:* {{ .Annotations.description }}{{ end }}
*Labels:*
{{ range .Labels.SortedPairs }} • `{{ .Name }}` = `{{ .Value }}`
{{ end }}
{{ if gt (len .Annotations) 0 }}*Annotations:*
{{ range .Annotations.SortedPairs }} • `{{ .Name }}` = `{{ .Value }}`
{{ end }}{{ end }}
{{ if .DashboardURL }}📊 [View Dashboard]({{ .DashboardURL }})
{{ end }}{{ if .PanelURL }}📈 [View Panel]({{ .PanelURL }})
{{ end }}{{ if .GeneratorURL }}🔗 [Alert Source]({{ .GeneratorURL }}) {{ end }} {{ end }} {{ end }}
{{ if or (gt (len .Alerts.Firing) 0) (gt (len .Alerts.Resolved) 0) }}
🔔 *Grafana Alertmanager:* [View All Alerts]({{ template "__alertmanagerURL" . }}) 🔔
{{ end }}
{{ end }}
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,910 @@
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "grafana",
"uid": "-- Grafana --"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"description": "A dashboard that monitors Django which focuses on breaking down requests by view.",
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"id": 12,
"links": [
{
"tags": ["backend"],
"targetBlank": true,
"title": "Backend Dashboards",
"type": "dashboards"
}
],
"liveNow": true,
"panels": [
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 0
},
"id": 1,
"panels": [],
"title": "Summary",
"type": "row"
},
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"fieldConfig": {
"defaults": {
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "red"
},
{
"color": "yellow",
"value": 0.95
},
{
"color": "green",
"value": 0.99
}
]
},
"unit": "percentunit"
},
"overrides": []
},
"gridPos": {
"h": 4,
"w": 6,
"x": 0,
"y": 1
},
"id": 2,
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"percentChangeColorMode": "standard",
"reduceOptions": {
"calcs": ["lastNotNull"],
"fields": "",
"values": false
},
"showPercentChange": false,
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "12.0.2",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"expr": "sum(\n rate(\n django_http_responses_total_by_status_view_method_total{\n namespace=~\"$namespace\",\n job=~\"$job\",\n view=\"$view\",\n method=~\"$method\",\n status!~\"[4-5].*\"\n }[1w]\n )\n) /\nsum(\n rate(\n django_http_responses_total_by_status_view_method_total{\n namespace=~\"$namespace\",\n job=~\"$job\",\n view=\"$view\",\n method=~\"$method\"\n }[1w]\n )\n)\n",
"refId": "A"
}
],
"title": "Success Rate (non 4xx-5xx responses) [1w]",
"type": "stat"
},
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"fieldConfig": {
"defaults": {
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green"
},
{
"color": "yellow",
"value": 10
},
{
"color": "red",
"value": 100
}
]
},
"unit": "short"
},
"overrides": []
},
"gridPos": {
"h": 4,
"w": 6,
"x": 6,
"y": 1
},
"id": 3,
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"percentChangeColorMode": "standard",
"reduceOptions": {
"calcs": ["lastNotNull"],
"fields": "",
"values": false
},
"showPercentChange": false,
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "12.0.2",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"expr": "sum by (view) (\n increase(\n django_http_exceptions_total_by_view_total{\n namespace=~\"$namespace\",\n job=~\"$job\",\n view=\"$view\",\n }[1w]\n ) > 0\n)\n",
"refId": "A"
}
],
"title": "HTTP Exceptions [1w]",
"type": "stat"
},
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"fieldConfig": {
"defaults": {
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green"
},
{
"color": "yellow",
"value": 1000
},
{
"color": "red",
"value": 2000
}
]
},
"unit": "s"
},
"overrides": []
},
"gridPos": {
"h": 4,
"w": 6,
"x": 12,
"y": 1
},
"id": 4,
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"percentChangeColorMode": "standard",
"reduceOptions": {
"calcs": ["lastNotNull"],
"fields": "",
"values": false
},
"showPercentChange": false,
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "12.0.2",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"expr": "histogram_quantile(0.50,\n sum (\n rate (\n django_http_requests_latency_seconds_by_view_method_bucket {\n namespace=~\"$namespace\",\n job=~\"$job\",\n view=\"$view\",\n method=~\"$method\"\n }[$__range]\n )\n ) by (job, le)\n)\n",
"refId": "A"
}
],
"title": "Average Request Latency (P50) [1w]",
"type": "stat"
},
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"fieldConfig": {
"defaults": {
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green"
},
{
"color": "yellow",
"value": 2500
},
{
"color": "red",
"value": 5000
}
]
},
"unit": "s"
},
"overrides": []
},
"gridPos": {
"h": 4,
"w": 6,
"x": 18,
"y": 1
},
"id": 5,
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"percentChangeColorMode": "standard",
"reduceOptions": {
"calcs": ["lastNotNull"],
"fields": "",
"values": false
},
"showPercentChange": false,
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "12.0.2",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"expr": "histogram_quantile(0.95,\n sum (\n rate (\n django_http_requests_latency_seconds_by_view_method_bucket {\n namespace=~\"$namespace\",\n job=~\"$job\",\n view=\"$view\",\n method=~\"$method\"\n }[$__range]\n )\n ) by (job, le)\n)\n",
"refId": "A"
}
],
"title": "Average Request Latency (P95) [1w]",
"type": "stat"
},
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 5
},
"id": 6,
"panels": [],
"title": "Request & Responses",
"type": "row"
},
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 100,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green"
},
{
"color": "red",
"value": 80
}
]
},
"unit": "reqps"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 6
},
"id": 7,
"options": {
"legend": {
"calcs": ["lastNotNull", "mean", "max"],
"displayMode": "table",
"placement": "right",
"showLegend": true,
"sortBy": "Mean",
"sortDesc": true
},
"tooltip": {
"hideZeros": false,
"mode": "multi",
"sort": "desc"
}
},
"pluginVersion": "12.0.2",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"expr": "round(\n sum(\n rate(\n django_http_requests_total_by_view_transport_method_total{\n namespace=~\"$namespace\",\n job=~\"$job\",\n view=\"$view\"\n }[$__rate_interval]\n ) > 0\n ) by (job), 0.001\n)\n",
"legendFormat": "reqps",
"refId": "A"
}
],
"title": "Requests",
"type": "timeseries"
},
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 100,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "percent"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green"
},
{
"color": "red",
"value": 80
}
]
},
"unit": "reqps"
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "2xx"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "green",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "3xx"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "blue",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "4xx"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "yellow",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "5xx"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "red",
"mode": "fixed"
}
}
]
}
]
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 6
},
"id": 8,
"options": {
"legend": {
"calcs": ["lastNotNull", "mean", "max"],
"displayMode": "table",
"placement": "right",
"showLegend": true,
"sortBy": "Mean",
"sortDesc": true
},
"tooltip": {
"hideZeros": false,
"mode": "multi",
"sort": "desc"
}
},
"pluginVersion": "12.0.2",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"expr": "round(\n sum(\n rate(\n django_http_responses_total_by_status_view_method_total{\n namespace=~\"$namespace\",\n job=~\"$job\",\n view=\"$view\",\n status=~\"2.*\",\n }[$__rate_interval]\n ) > 0\n ) by (job), 0.001\n)\n",
"legendFormat": "2xx",
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"expr": "round(\n sum(\n rate(\n django_http_responses_total_by_status_view_method_total{\n namespace=~\"$namespace\",\n job=~\"$job\",\n view=\"$view\",\n status=~\"3.*\",\n }[$__rate_interval]\n ) > 0\n ) by (job), 0.001\n)\n",
"legendFormat": "3xx",
"refId": "B"
},
{
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"expr": "round(\n sum(\n rate(\n django_http_responses_total_by_status_view_method_total{\n namespace=~\"$namespace\",\n job=~\"$job\",\n view=\"$view\",\n status=~\"4.*\",\n }[$__rate_interval]\n ) > 0\n ) by (job), 0.001\n)\n",
"legendFormat": "4xx",
"refId": "C"
},
{
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"expr": "round(\n sum(\n rate(\n django_http_responses_total_by_status_view_method_total{\n namespace=~\"$namespace\",\n job=~\"$job\",\n view=\"$view\",\n status=~\"5.*\",\n }[$__rate_interval]\n ) > 0\n ) by (job), 0.001\n)\n",
"legendFormat": "5xx",
"refId": "D"
}
],
"title": "Responses",
"type": "timeseries"
},
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 14
},
"id": 9,
"panels": [],
"title": "Latency & Status Codes",
"type": "row"
},
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 100,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "value"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green"
},
{
"color": "red",
"value": 80
}
]
},
"unit": "reqps"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 15
},
"id": 10,
"options": {
"legend": {
"calcs": ["lastNotNull", "mean", "max"],
"displayMode": "table",
"placement": "right",
"showLegend": true,
"sortBy": "Mean",
"sortDesc": true
},
"tooltip": {
"hideZeros": false,
"mode": "multi",
"sort": "desc"
}
},
"pluginVersion": "12.0.2",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"expr": "round(\n sum(\n rate(\n django_http_responses_total_by_status_view_method_total{\n namespace=~\"$namespace\",\n job=~\"$job\",\n view=\"$view\",\n method=~\"$method\",\n }[$__rate_interval]\n ) > 0\n ) by (namespace, job, view, status, method), 0.001\n)\n",
"legendFormat": "{{ view }} / {{ status }} / {{ method }}",
"refId": "A"
}
],
"title": "Responses Status Codes",
"type": "timeseries"
},
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green"
},
{
"color": "red",
"value": 80
}
]
},
"unit": "s"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 15
},
"id": 11,
"options": {
"legend": {
"calcs": ["lastNotNull", "mean", "max"],
"displayMode": "table",
"placement": "right",
"showLegend": true,
"sortBy": "Mean",
"sortDesc": true
},
"tooltip": {
"hideZeros": false,
"mode": "multi",
"sort": "desc"
}
},
"pluginVersion": "12.0.2",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"expr": "histogram_quantile(0.50,\n sum(\n irate(\n django_http_requests_latency_seconds_by_view_method_bucket{\n namespace=~\"$namespace\",\n job=~\"$job\",\n view=\"$view\",\n method=~\"$method\"\n }[$__rate_interval]\n ) > 0\n ) by (view, le)\n)\n",
"legendFormat": "50 - {{ view }}",
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"expr": "histogram_quantile(0.95,\n sum(\n irate(\n django_http_requests_latency_seconds_by_view_method_bucket{\n namespace=~\"$namespace\",\n job=~\"$job\",\n view=\"$view\",\n method=~\"$method\"\n }[$__rate_interval]\n ) > 0\n ) by (view, le)\n)\n",
"legendFormat": "95 - {{ view }}",
"refId": "B"
},
{
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"expr": "histogram_quantile(0.99,\n sum(\n irate(\n django_http_requests_latency_seconds_by_view_method_bucket{\n namespace=~\"$namespace\",\n job=~\"$job\",\n view=\"$view\",\n method=~\"$method\"\n }[$__rate_interval]\n ) > 0\n ) by (view, le)\n)\n",
"legendFormat": "99 - {{ view }}",
"refId": "C"
},
{
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"expr": "histogram_quantile(0.999,\n sum(\n irate(\n django_http_requests_latency_seconds_by_view_method_bucket{\n namespace=~\"$namespace\",\n job=~\"$job\",\n view=\"$view\",\n method=~\"$method\"\n }[$__rate_interval]\n ) > 0\n ) by (view, le)\n)\n",
"legendFormat": "99.9 - {{ view }}",
"refId": "D"
}
],
"title": "Request Latency",
"type": "timeseries"
}
],
"preload": false,
"refresh": "5s",
"schemaVersion": 41,
"tags": ["backend"],
"templating": {
"list": [
{
"label": "Data source",
"name": "datasource",
"query": "prometheus",
"refresh": 1,
"type": "datasource"
},
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"includeAll": false,
"label": "Namespace",
"name": "namespace",
"query": "label_values(django_http_responses_total_by_status_view_method_total{}, namespace)",
"refresh": 2,
"sort": 1,
"type": "query"
},
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"includeAll": false,
"label": "Job",
"name": "job",
"query": "label_values(django_http_responses_total_by_status_view_method_total{namespace=~\"$namespace\"}, job)",
"refresh": 2,
"sort": 1,
"type": "query"
},
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"includeAll": false,
"label": "View",
"name": "view",
"query": "label_values(django_http_responses_total_by_status_view_method_total{namespace=~\"$namespace\", job=~\"$job\", view!~\"<unnamed view>|health_check:health_check_home|prometheus-django-metrics\"}, view)",
"refresh": 2,
"sort": 1,
"type": "query"
},
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"includeAll": true,
"label": "Method",
"multi": true,
"name": "method",
"query": "label_values(django_http_responses_total_by_status_view_method_total{namespace=~\"$namespace\", job=~\"$job\", view=~\"$view\"}, method)",
"refresh": 2,
"sort": 1,
"type": "query"
}
]
},
"time": {
"from": "now-30m",
"to": "now"
},
"timepicker": {},
"timezone": "browser",
"title": "Requests / By View",
"uid": "backend-requests-by-view",
"version": 2,
"weekStart": "monday"
}
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,768 @@
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "grafana",
"uid": "-- Grafana --"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"description": "A dashboard that monitors Celery.",
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"id": 2,
"links": [
{
"tags": ["celery"],
"targetBlank": true,
"title": "Celery Dashboards",
"type": "dashboards"
}
],
"liveNow": true,
"panels": [
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 0
},
"id": 1,
"panels": [],
"title": "Tasks",
"type": "row"
},
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"fieldConfig": {
"defaults": {
"custom": {
"align": "auto",
"cellOptions": {
"type": "auto"
},
"inspect": false
},
"mappings": [],
"noValue": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green"
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "Success Rate"
},
"properties": [
{
"id": "unit",
"value": "percentunit"
}
]
}
]
},
"gridPos": {
"h": 8,
"w": 16,
"x": 0,
"y": 1
},
"id": 2,
"options": {
"cellHeight": "sm",
"footer": {
"countRows": false,
"enablePagination": true,
"fields": "",
"reducer": ["sum"],
"show": false
},
"showHeader": true,
"sortBy": [
{
"desc": true,
"displayName": "Succeeded"
}
]
},
"pluginVersion": "12.0.2",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"expr": "sum (\n round(\n increase(\n celery_task_succeeded_total{\n job=\"$job\",\n name=~\"$task\",\n queue_name=~\"$queue_name\"\n }[$__range]\n )\n )\n) by (name)\n/(sum (\n round(\n increase(\n celery_task_succeeded_total{\n job=\"$job\",\n name=~\"$task\",\n queue_name=~\"$queue_name\"\n }[$__range]\n )\n )\n) by (name)\n+sum (\n round(\n increase(\n celery_task_failed_total{\n job=\"$job\",\n name=~\"$task\",\n queue_name=~\"$queue_name\"\n }[$__range]\n )\n )\n) by (name)\n) > -1\n",
"format": "table",
"instant": true,
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"expr": "sum (\n round(\n increase(\n celery_task_succeeded_total{\n job=\"$job\",\n name=~\"$task\",\n queue_name=~\"$queue_name\"\n }[$__range]\n )\n )\n) by (name) > 0\n",
"format": "table",
"instant": true,
"refId": "B"
},
{
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"expr": "sum (\n round(\n increase(\n celery_task_failed_total{\n job=\"$job\",\n name=~\"$task\",\n queue_name=~\"$queue_name\"\n }[$__range]\n )\n )\n) by (name) > 0\n",
"format": "table",
"instant": true,
"refId": "C"
},
{
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"expr": "sum (\n round(\n increase(\n celery_task_sent_total{\n job=\"$job\",\n name=~\"$task\",\n queue_name=~\"$queue_name\"\n }[$__range]\n )\n )\n) by (name) > 0\n",
"format": "table",
"instant": true,
"refId": "D"
},
{
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"expr": "sum (\n round(\n increase(\n celery_task_received_total{\n job=\"$job\",\n name=~\"$task\",\n queue_name=~\"$queue_name\"\n }[$__range]\n )\n )\n) by (name) > 0\n",
"format": "table",
"instant": true,
"refId": "E"
},
{
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"expr": "sum (\n round(\n increase(\n celery_task_rejected_total{\n job=\"$job\",\n name=~\"$task\",\n queue_name=~\"$queue_name\"\n }[$__range]\n )\n )\n) by (name) > 0\n",
"format": "table",
"instant": true,
"refId": "F"
},
{
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"expr": "sum (\n round(\n increase(\n celery_task_retried_total{\n job=\"$job\",\n name=~\"$task\",\n queue_name=~\"$queue_name\"\n }[$__range]\n )\n )\n) by (name) > 0\n",
"format": "table",
"instant": true,
"refId": "G"
},
{
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"expr": "sum (\n round(\n increase(\n celery_task_revoked_total{\n job=\"$job\",\n name=~\"$task\",\n queue_name=~\"$queue_name\"\n }[$__range]\n )\n )\n) by (name) > 0\n",
"format": "table",
"instant": true,
"refId": "H"
}
],
"title": "Task Stats",
"transformations": [
{
"id": "merge"
},
{
"id": "organize",
"options": {
"excludeByName": {
"Time": true
},
"indexByName": {
"Value #A": 1,
"Value #B": 2,
"Value #C": 3,
"Value #D": 4,
"Value #E": 5,
"Value #F": 6,
"Value #G": 7,
"Value #H": 8,
"name": 0
},
"renameByName": {
"Value #A": "Success Rate",
"Value #B": "Succeeded",
"Value #C": "Failed",
"Value #D": "Sent",
"Value #E": "Received",
"Value #F": "Rejected",
"Value #G": "Retried",
"Value #H": "Revoked",
"name": "Name"
}
}
}
],
"type": "table"
},
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"fieldConfig": {
"defaults": {
"custom": {
"align": "auto",
"cellOptions": {
"type": "auto"
},
"inspect": false
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green"
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 8,
"x": 16,
"y": 1
},
"id": 3,
"options": {
"cellHeight": "sm",
"footer": {
"countRows": false,
"enablePagination": true,
"fields": "",
"reducer": ["sum"],
"show": false
},
"showHeader": true,
"sortBy": [
{
"desc": true,
"displayName": "Value"
}
]
},
"pluginVersion": "12.0.2",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"expr": "round(\n sum (\n increase(\n celery_task_failed_total{\n job=\"$job\",\n name=~\"$task\",\n queue_name=~\"$queue_name\"\n }[$__range]\n )\n ) by (name, exception) > 0\n)\n",
"format": "table",
"instant": true,
"refId": "A"
}
],
"title": "Task Exceptions",
"transformations": [
{
"id": "organize",
"options": {
"excludeByName": {
"Time": true,
"job": true
},
"indexByName": {
"Value": 2,
"exception": 1,
"name": 0
},
"renameByName": {
"exception": "Exception",
"name": "Task"
}
}
}
],
"type": "table"
},
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green"
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 24,
"x": 0,
"y": 9
},
"id": 4,
"options": {
"legend": {
"calcs": ["mean", "max"],
"displayMode": "table",
"placement": "right",
"showLegend": true,
"sortBy": "Mean",
"sortDesc": true
},
"tooltip": {
"hideZeros": false,
"mode": "multi",
"sort": "desc"
}
},
"pluginVersion": "12.0.2",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"expr": "sum (\n round(\n increase(\n celery_task_succeeded_total{\n job=\"$job\",\n name=~\"$task\",\n queue_name=~\"$queue_name\"\n }[$__rate_interval]\n )\n )\n) by (name) > 0\n",
"legendFormat": "Succeeded - {{ name }}",
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"expr": "sum (\n round(\n increase(\n celery_task_failed_total{\n job=\"$job\",\n name=~\"$task\",\n queue_name=~\"$queue_name\"\n }[$__rate_interval]\n )\n )\n) by (name) > 0\n",
"legendFormat": "Failed - {{ name }}",
"refId": "B"
},
{
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"expr": "sum (\n round(\n increase(\n celery_task_sent_total{\n job=\"$job\",\n name=~\"$task\",\n queue_name=~\"$queue_name\"\n }[$__rate_interval]\n )\n )\n) by (name) > 0\n",
"legendFormat": "Sent - {{ name }}",
"refId": "C"
},
{
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"expr": "sum (\n round(\n increase(\n celery_task_received_total{\n job=\"$job\",\n name=~\"$task\",\n queue_name=~\"$queue_name\"\n }[$__rate_interval]\n )\n )\n) by (name) > 0\n",
"legendFormat": "Received - {{ name }}",
"refId": "D"
},
{
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"expr": "sum (\n round(\n increase(\n celery_task_retried_total{\n job=\"$job\",\n name=~\"$task\",\n queue_name=~\"$queue_name\"\n }[$__rate_interval]\n )\n )\n) by (name) > 0\n",
"legendFormat": "Retried - {{ name }}",
"refId": "E"
},
{
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"expr": "sum (\n round(\n increase(\n celery_task_revoked_total{\n job=\"$job\",\n name=~\"$task\",\n queue_name=~\"$queue_name\"\n }[$__rate_interval]\n )\n )\n) by (name) > 0\n",
"legendFormat": "Revoked - {{ name }}",
"refId": "F"
},
{
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"expr": "sum (\n round(\n increase(\n celery_task_rejected_total{\n job=\"$job\",\n name=~\"$task\",\n queue_name=~\"$queue_name\"\n }[$__rate_interval]\n )\n )\n) by (name) > 0\n",
"legendFormat": "Rejected - {{ name }}",
"refId": "G"
}
],
"title": "Tasks Completed",
"type": "timeseries"
},
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green"
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 24,
"x": 0,
"y": 17
},
"id": 5,
"options": {
"legend": {
"calcs": ["mean", "max"],
"displayMode": "table",
"placement": "right",
"showLegend": true,
"sortBy": "Mean",
"sortDesc": true
},
"tooltip": {
"hideZeros": false,
"mode": "multi",
"sort": "desc"
}
},
"pluginVersion": "12.0.2",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"expr": "sum (\n round(\n increase(\n celery_task_failed_total{\n job=\"$job\",\n name=~\"$task\",\n queue_name=~\"$queue_name\"\n }[$__rate_interval]\n )\n )\n) by (name, exception) > 0\n",
"legendFormat": "{{ name }}/{{ exception }}",
"refId": "A"
}
],
"title": "Task Exceptions",
"type": "timeseries"
},
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"fieldConfig": {
"defaults": {
"custom": {
"spanNulls": false
},
"unit": "s"
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "P50"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "green",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "P95"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "yellow",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "P99"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "red",
"mode": "fixed"
}
}
]
}
]
},
"gridPos": {
"h": 8,
"w": 24,
"x": 0,
"y": 25
},
"id": 6,
"options": {
"legend": {
"calcs": ["mean", "max"],
"displayMode": "table",
"placement": "right",
"showLegend": true,
"sortBy": "Mean",
"sortDesc": true
},
"tooltip": {
"mode": "multi",
"sort": "desc"
}
},
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"expr": "histogram_quantile(0.50,\n sum(\n irate(\n celery_task_runtime_bucket{\n job=\"$job\",\n name=~\"$task\",\n queue_name=~\"$queue_name\"\n }[$__rate_interval]\n ) > 0\n ) by (name, job, le)\n)\n",
"legendFormat": "P50 - {{ name }}",
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"expr": "histogram_quantile(0.95,\n sum(\n irate(\n celery_task_runtime_bucket{\n job=\"$job\",\n name=~\"$task\",\n queue_name=~\"$queue_name\"\n }[$__rate_interval]\n ) > 0\n ) by (name, job, le)\n)\n",
"legendFormat": "P95 - {{ name }}",
"refId": "B"
},
{
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"expr": "histogram_quantile(0.99,\n sum(\n irate(\n celery_task_runtime_bucket{\n job=\"$job\",\n name=~\"$task\",\n queue_name=~\"$queue_name\"\n }[$__rate_interval]\n ) > 0\n ) by (name, job, le)\n)\n",
"legendFormat": "P99 - {{ name }}",
"refId": "C"
}
],
"title": "Tasks Runtime",
"type": "timeseries"
}
],
"preload": false,
"refresh": "10s",
"schemaVersion": 41,
"tags": ["celery"],
"templating": {
"list": [
{
"label": "Data source",
"name": "datasource",
"query": "prometheus",
"refresh": 1,
"type": "datasource"
},
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"includeAll": false,
"label": "Namespace",
"name": "namespace",
"query": "label_values(celery_worker_up{}, namespace)",
"refresh": 2,
"sort": 1,
"type": "query"
},
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"includeAll": false,
"label": "Job",
"name": "job",
"query": "label_values(celery_worker_up{namespace=\"$namespace\"}, job)",
"refresh": 2,
"sort": 1,
"type": "query"
},
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"includeAll": false,
"label": "Queue Name",
"name": "queue_name",
"query": "label_values(celery_task_received_total{namespace=\"$namespace\", job=\"$job\", name!~\"None\"}, queue_name)",
"refresh": 2,
"sort": 1,
"type": "query"
},
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"includeAll": false,
"label": "Task",
"multi": true,
"name": "task",
"query": "label_values(celery_task_received_total{namespace=\"$namespace\", job=\"$job\", queue_name=~\"$queue_name\", name!~\"None\"}, name)",
"refresh": 2,
"sort": 1,
"type": "query"
}
]
},
"time": {
"from": "now-24h",
"to": "now"
},
"timepicker": {},
"timezone": "browser",
"title": "By Task",
"uid": "celery-tasks-by-task",
"version": 1,
"weekStart": "monday"
}
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
+13 -6
View File
@@ -1,7 +1,7 @@
# Global config
global:
scrape_interval: 10s
scrape_timeout: 10s
scrape_interval: 5s
scrape_timeout: 5s
evaluation_interval: 10m
external_labels:
environment: local
@@ -10,24 +10,31 @@ global:
scrape_configs:
# Prometheus
- job_name: prometheus
scrape_interval: 5s
static_configs:
- targets: ["localhost:9090"]
# Postgres
- job_name: postgres
scrape_interval: 10s
static_configs:
- targets: ["postgres-exporter:9187"]
# Redis
- job_name: redis
scrape_interval: 10s
static_configs:
- targets: ["redis-exporter:9121"]
# Celery
- job_name: celery
scrape_interval: 30s
scrape_interval: 15s
static_configs:
- targets: ["celery-exporter:9808"]
# Backend
- job_name: backend
static_configs:
- targets: ["backend:8080"]
# Caddy
- job_name: caddy
static_configs:
- targets: ["proxy:2019"]