feat(loadtest): added loadtesting with k6

This commit is contained in:
ITQ
2026-02-24 19:47:49 +03:00
parent ade94d35fd
commit f56a26836d
7 changed files with 618 additions and 3 deletions
+3
View File
@@ -27,3 +27,6 @@ Temporary Items
# Env files
.env
# Generated artifacts
artifacts/
+2 -2
View File
@@ -65,6 +65,6 @@
## 6. Наблюдаемость и эксплуатация
- Health/readiness endpoints: [src/backend/api/urls.py](./src/backend/api/urls.py)
- Prometheus middleware/logging config: [src/backend/config/settings.py](./src/backend/config/settings.py)
- Structured logs: [src/backend/config/settings.py](./src/backend/config/settings.py)
- Prometheus middleware/logging config: [src/backend/config/settings/base.py](./src/backend/config/settings/base.py)
- Structured logs: [src/backend/config/settings/base.py](./src/backend/config/settings/base.py)
- CI/CD config: [.gitlab-ci.yml](./.gitlab-ci.yml)
+9 -1
View File
@@ -81,7 +81,7 @@ Please note that by default containers will use ports 80 (reverse proxy) and ran
#### 1. Configuration
- Docker compose configuration files are stored in [deploy/compose](./deploy/compose).
- Configuration files for containers are stored in [infrastrucutre/configs](./infrastrucutre/configs).
- Configuration files for containers are stored in [infrastructure/configs](./infrastructure/configs).
Env could be customized by creating `.env` file in each service config directory, it will automatically override the default values from `.env.template`.
- Ports on which containers will be accessible are defined in [.env.template](./.env.template). This could be customized by creating `.env` file in the root directory and patching the following lines compose you are running:
@@ -133,3 +133,11 @@ Example run:
![metrics](./assets/images/metrics.png)
System metrics (gc, requests, etc.) and several business metrics (`lotty_decide_requests_total`, `lotty_events_ingested_total`).
## Load testing (k6)
Reproducible k6 profile for `POST /api/v1/decide`:
- scenario script: [infrastructure/k6/decide.js](./infrastructure/k6/decide.js)
- runner: [infrastructure/k6/run-decide.sh](./infrastructure/k6/run-decide.sh)
- guide: [infrastructure/k6/README.md](./infrastructure/k6/README.md)
+83
View File
@@ -0,0 +1,83 @@
# k6 Load Testing
Reproducible load test profile for `POST /api/v1/decide`.
## Prerequisites
- Docker + Docker Compose
- `jq`
- Running stack (`docker compose -f compose.yaml up -d`)
## One-command run
```bash
./infrastructure/k6/run-decide.sh
```
This command:
1. Prepares deterministic fixture via `prepare_k6_fixture`.
2. Runs `grafana/k6` in a pinned container image.
3. Saves artifacts to `artifacts/k6/<RUN_ID>/`.
Artifacts:
- `fixture.json`
- `run.env`
- `summary.json`
## Reproducible rerun
Use the same `RUN_ID` and k6 profile parameters.
```bash
RUN_ID=baseline_20260224 \
START_RPS=20 \
RAMP_UP_RPS=200 \
HOLD_RPS=200 \
HOLD_DURATION=2m \
./infrastructure/k6/run-decide.sh
```
## Target URL
Default target for k6 container:
- `K6_BASE_URL=http://host.docker.internal`
Override if needed:
```bash
K6_BASE_URL=http://host.docker.internal:14609 ./infrastructure/k6/run-decide.sh
```
## Profile knobs
- `START_RPS`
- `RAMP_UP_RPS`
- `HOLD_RPS`
- `RAMP_UP_DURATION`
- `HOLD_DURATION`
- `RAMP_DOWN_DURATION`
- `PRE_ALLOCATED_VUS`
- `MAX_VUS`
- `THRESHOLD_ERROR_RATE`
- `THRESHOLD_P95_MS`
- `THRESHOLD_P99_MS`
- `K6_IMAGE`
## Compare two runs
```bash
BASE=artifacts/k6/baseline_20260224/summary.json
CAND=artifacts/k6/candidate_20260224/summary.json
jq -n --argfile b "$BASE" --argfile c "$CAND" '{
baseline_p95_ms: $b.metrics.http_req_duration["p(95)"],
candidate_p95_ms: $c.metrics.http_req_duration["p(95)"],
baseline_req_per_s: $b.metrics.http_reqs.rate,
candidate_req_per_s: $c.metrics.http_reqs.rate,
baseline_error_rate: $b.metrics.http_req_failed.value,
candidate_error_rate: $c.metrics.http_req_failed.value
}'
```
+121
View File
@@ -0,0 +1,121 @@
import http from "k6/http";
import { check, sleep } from "k6";
import { Counter, Rate, Trend } from "k6/metrics";
const BASE_URL = (__ENV.BASE_URL || "http://host.docker.internal").replace(
/\/$/,
"",
);
const API_URL = `${BASE_URL}/api/v1`;
const FLAG_KEY = __ENV.FLAG_KEY || "";
const SUBJECT_PREFIX = __ENV.SUBJECT_PREFIX || "k6_subject";
const SUBJECT_COUNTRY = __ENV.SUBJECT_COUNTRY || "US";
const SUBJECT_POOL = Number(__ENV.SUBJECT_POOL || "20000");
const THINK_TIME_SECONDS = Number(__ENV.THINK_TIME_SECONDS || "0");
const START_RATE = Number(__ENV.START_RPS || "20");
const RAMP_UP_RATE = Number(__ENV.RAMP_UP_RPS || "200");
const HOLD_RATE = Number(__ENV.HOLD_RPS || "200");
const PRE_ALLOCATED_VUS = Number(__ENV.PRE_ALLOCATED_VUS || "100");
const MAX_VUS = Number(__ENV.MAX_VUS || "600");
const RAMP_UP_DURATION = __ENV.RAMP_UP_DURATION || "30s";
const HOLD_DURATION = __ENV.HOLD_DURATION || "2m";
const RAMP_DOWN_DURATION = __ENV.RAMP_DOWN_DURATION || "20s";
const THRESHOLD_ERROR_RATE = __ENV.THRESHOLD_ERROR_RATE || "0.01";
const THRESHOLD_P95_MS = __ENV.THRESHOLD_P95_MS || "250";
const THRESHOLD_P99_MS = __ENV.THRESHOLD_P99_MS || "500";
if (!FLAG_KEY) {
throw new Error("FLAG_KEY is required");
}
export const options = {
scenarios: {
decide_hot_path: {
executor: "ramping-arrival-rate",
startRate: START_RATE,
timeUnit: "1s",
preAllocatedVUs: PRE_ALLOCATED_VUS,
maxVUs: MAX_VUS,
stages: [
{ target: RAMP_UP_RATE, duration: RAMP_UP_DURATION },
{ target: HOLD_RATE, duration: HOLD_DURATION },
{ target: 0, duration: RAMP_DOWN_DURATION },
],
},
},
thresholds: {
http_req_failed: [`rate<${THRESHOLD_ERROR_RATE}`],
http_req_duration: [
`p(95)<${THRESHOLD_P95_MS}`,
`p(99)<${THRESHOLD_P99_MS}`,
],
decide_status_200_rate: ["rate>0.99"],
},
summaryTrendStats: [
"avg",
"min",
"med",
"p(90)",
"p(95)",
"p(99)",
"max",
],
};
const decideStatus200Rate = new Rate("decide_status_200_rate");
const decideAssignedRate = new Rate("decide_experiment_assigned_rate");
const decideRequests = new Counter("decide_requests_total");
const decideDuration = new Trend("decide_request_duration_ms", true);
function buildSubjectId() {
const idx = ((__ITER * 104729 + __VU * 8191) % SUBJECT_POOL) + 1;
return `${SUBJECT_PREFIX}_${idx}`;
}
function buildPayload() {
return JSON.stringify({
subject_id: buildSubjectId(),
subject_attributes: { country: SUBJECT_COUNTRY },
flags: [FLAG_KEY],
});
}
export default function () {
const response = http.post(`${API_URL}/decide`, buildPayload(), {
headers: { "Content-Type": "application/json" },
tags: { endpoint: "decide" },
});
decideRequests.add(1);
decideDuration.add(response.timings.duration);
decideStatus200Rate.add(response.status === 200);
let reason = "";
if (response.status === 200) {
const body = response.json();
if (body && body.decisions && body.decisions.length > 0) {
reason = String(body.decisions[0].reason || "");
}
}
decideAssignedRate.add(reason === "experiment_assigned");
check(response, {
"status is 200": (r) => r.status === 200,
"has one decision": (r) => {
const body = r.json();
return (
body !== null &&
typeof body === "object" &&
Array.isArray(body.decisions) &&
body.decisions.length === 1
);
},
});
if (THINK_TIME_SECONDS > 0) {
sleep(THINK_TIME_SECONDS);
}
}
+113
View File
@@ -0,0 +1,113 @@
#!/usr/bin/env bash
set -euo pipefail
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
RUN_ID="${RUN_ID:-$(date -u +%Y%m%d%H%M%S)}"
K6_IMAGE="${K6_IMAGE:-grafana/k6:0.50.0}"
K6_BASE_URL="${K6_BASE_URL:-http://host.docker.internal}"
START_RPS="${START_RPS:-20}"
RAMP_UP_RPS="${RAMP_UP_RPS:-200}"
HOLD_RPS="${HOLD_RPS:-200}"
PRE_ALLOCATED_VUS="${PRE_ALLOCATED_VUS:-100}"
MAX_VUS="${MAX_VUS:-600}"
RAMP_UP_DURATION="${RAMP_UP_DURATION:-30s}"
HOLD_DURATION="${HOLD_DURATION:-2m}"
RAMP_DOWN_DURATION="${RAMP_DOWN_DURATION:-20s}"
THRESHOLD_ERROR_RATE="${THRESHOLD_ERROR_RATE:-0.01}"
THRESHOLD_P95_MS="${THRESHOLD_P95_MS:-250}"
THRESHOLD_P99_MS="${THRESHOLD_P99_MS:-500}"
RESULTS_DIR="${RESULTS_DIR:-$ROOT_DIR/artifacts/k6/$RUN_ID}"
mkdir -p "$RESULTS_DIR"
prepare_fixture() {
local output=""
if (
cd "$ROOT_DIR" &&
docker compose exec -T backend true >/dev/null 2>&1
); then
if output="$(
cd "$ROOT_DIR" &&
docker compose exec -T backend python manage.py prepare_k6_fixture \
--run-id "$RUN_ID" \
--json
)"; then
echo "$output"
return
fi
fi
output="$(
cd "$ROOT_DIR/src/backend"
uv run python manage.py prepare_k6_fixture \
--run-id "$RUN_ID" \
--json
)"
echo "$output"
}
if ! command -v jq >/dev/null 2>&1; then
echo "jq is required" >&2
exit 1
fi
FIXTURE_JSON="$(prepare_fixture)"
echo "$FIXTURE_JSON" >"$RESULTS_DIR/fixture.json"
FLAG_KEY="$(echo "$FIXTURE_JSON" | jq -r '.flag_key')"
SUBJECT_COUNTRY="$(
echo "$FIXTURE_JSON" | jq -r '.subject_attributes.country // "US"'
)"
if [[ -z "$FLAG_KEY" || "$FLAG_KEY" == "null" ]]; then
echo "failed to resolve FLAG_KEY from fixture" >&2
exit 1
fi
cat >"$RESULTS_DIR/run.env" <<EOF
RUN_ID=$RUN_ID
K6_IMAGE=$K6_IMAGE
K6_BASE_URL=$K6_BASE_URL
FLAG_KEY=$FLAG_KEY
SUBJECT_COUNTRY=$SUBJECT_COUNTRY
START_RPS=$START_RPS
RAMP_UP_RPS=$RAMP_UP_RPS
HOLD_RPS=$HOLD_RPS
PRE_ALLOCATED_VUS=$PRE_ALLOCATED_VUS
MAX_VUS=$MAX_VUS
RAMP_UP_DURATION=$RAMP_UP_DURATION
HOLD_DURATION=$HOLD_DURATION
RAMP_DOWN_DURATION=$RAMP_DOWN_DURATION
THRESHOLD_ERROR_RATE=$THRESHOLD_ERROR_RATE
THRESHOLD_P95_MS=$THRESHOLD_P95_MS
THRESHOLD_P99_MS=$THRESHOLD_P99_MS
EOF
docker run --rm -i \
--add-host host.docker.internal:host-gateway \
-v "$ROOT_DIR/infrastructure/k6:/k6:ro" \
-v "$RESULTS_DIR:/results" \
-e BASE_URL="$K6_BASE_URL" \
-e FLAG_KEY="$FLAG_KEY" \
-e SUBJECT_COUNTRY="$SUBJECT_COUNTRY" \
-e START_RPS="$START_RPS" \
-e RAMP_UP_RPS="$RAMP_UP_RPS" \
-e HOLD_RPS="$HOLD_RPS" \
-e PRE_ALLOCATED_VUS="$PRE_ALLOCATED_VUS" \
-e MAX_VUS="$MAX_VUS" \
-e RAMP_UP_DURATION="$RAMP_UP_DURATION" \
-e HOLD_DURATION="$HOLD_DURATION" \
-e RAMP_DOWN_DURATION="$RAMP_DOWN_DURATION" \
-e THRESHOLD_ERROR_RATE="$THRESHOLD_ERROR_RATE" \
-e THRESHOLD_P95_MS="$THRESHOLD_P95_MS" \
-e THRESHOLD_P99_MS="$THRESHOLD_P99_MS" \
"$K6_IMAGE" run \
--summary-export /results/summary.json \
/k6/decide.js
echo "results: $RESULTS_DIR"
@@ -0,0 +1,287 @@
import json
from decimal import Decimal
from typing import override
from django.core.management.base import (
BaseCommand,
CommandError,
CommandParser,
)
from apps.experiments.models import Experiment, ExperimentStatus
from apps.experiments.services import (
experiment_approve,
experiment_create,
experiment_reopen,
experiment_resume,
experiment_start,
experiment_submit_for_review,
variant_create,
)
from apps.flags.models import FeatureFlag
from apps.flags.selectors import feature_flag_get_by_key
from apps.flags.services import feature_flag_create
from apps.reviews.models import ApproverGroup
from apps.users.models import User, UserRole
class Command(BaseCommand):
help = "Prepare deterministic fixture for k6 decide load tests"
@override
def add_arguments(self, parser: CommandParser) -> None:
parser.add_argument(
"--run-id",
required=True,
type=str,
)
parser.add_argument(
"--owner",
default="experimenter",
type=str,
)
parser.add_argument(
"--approver",
default="approver",
type=str,
)
parser.add_argument(
"--flag-key",
default=None,
type=str,
)
parser.add_argument(
"--experiment-name",
default=None,
type=str,
)
parser.add_argument(
"--targeting-rules",
default='country == "US"',
type=str,
)
parser.add_argument(
"--json",
action="store_true",
default=False,
)
@override
def handle(self, *args, **options) -> None:
run_id_raw: str = options["run_id"]
owner_username: str = options["owner"]
approver_username: str = options["approver"]
flag_key_override: str | None = options["flag_key"]
experiment_name_override: str | None = options["experiment_name"]
targeting_rules: str = options["targeting_rules"]
is_json: bool = options["json"]
run_id = self._normalize_run_id(run_id_raw)
owner = self._load_user(owner_username, UserRole.EXPERIMENTER)
approver = self._load_user(approver_username, UserRole.APPROVER)
self._ensure_approver_group(owner, approver)
flag_key = flag_key_override or f"k6_{run_id}_flag"
experiment_name = experiment_name_override or f"k6_{run_id}_experiment"
flag = self._ensure_flag(flag_key, run_id)
experiment, created = self._resolve_experiment(
flag=flag,
owner=owner,
name=experiment_name,
targeting_rules=targeting_rules,
)
if created:
self._ensure_variants(experiment=experiment, owner=owner)
experiment = self._ensure_running(
experiment=experiment,
owner=owner,
approver=approver,
)
payload = {
"run_id": run_id,
"flag_id": str(flag.pk),
"flag_key": flag.key,
"experiment_id": str(experiment.pk),
"experiment_status": experiment.status,
"owner": owner.username,
"approver": approver.username,
"subject_attributes": {"country": "US"},
}
if is_json:
self.stdout.write(json.dumps(payload))
return
self.stdout.write(
self.style.SUCCESS(
f"k6 fixture ready: flag_key={flag.key}, "
f"experiment_id={experiment.pk}, status={experiment.status}"
)
)
self.stdout.write(json.dumps(payload, indent=2))
def _normalize_run_id(self, value: str) -> str:
normalized = "".join(
ch.lower() if ch.isalnum() else "_" for ch in value.strip()
).strip("_")
if not normalized:
raise CommandError("run-id cannot be empty.")
if not normalized[0].isalpha():
normalized = f"r_{normalized}"
return normalized
def _load_user(self, username: str, expected_role: str) -> User:
user = User.objects.filter(username=username).first()
if user is None:
raise CommandError(
f"User '{username}' was not found. Seed users before running."
)
if user.role != expected_role:
raise CommandError(
f"User '{username}' must have role '{expected_role}'."
)
return user
def _ensure_approver_group(self, owner: User, approver: User) -> None:
group, _ = ApproverGroup.objects.get_or_create(
experimenter=owner,
defaults={"min_approvals": 1},
)
if group.min_approvals != 1:
group.min_approvals = 1
group.save(update_fields=["min_approvals", "updated_at"])
if not group.approvers.filter(pk=approver.pk).exists():
group.approvers.add(approver)
def _ensure_flag(self, key: str, run_id: str) -> FeatureFlag:
flag = feature_flag_get_by_key(key)
if flag:
return flag
return feature_flag_create(
key=key,
name=f"k6 {run_id} decide",
value_type="string",
default_value="control",
)
def _resolve_experiment(
self,
*,
flag: FeatureFlag,
owner: User,
name: str,
targeting_rules: str,
) -> tuple[Experiment, bool]:
reusable = (
Experiment.objects.filter(
flag=flag,
status__in=(
ExperimentStatus.RUNNING,
ExperimentStatus.PAUSED,
ExperimentStatus.APPROVED,
ExperimentStatus.IN_REVIEW,
ExperimentStatus.DRAFT,
ExperimentStatus.REJECTED,
),
)
.order_by("-created_at")
.first()
)
if reusable:
return reusable, False
experiment = experiment_create(
flag=flag,
name=name,
owner=owner,
description="k6 decide benchmark fixture",
hypothesis="k6 baseline",
traffic_allocation=Decimal("100.00"),
targeting_rules=targeting_rules,
)
return experiment, True
def _ensure_variants(self, *, experiment: Experiment, owner: User) -> None:
if experiment.variants.exists():
return
variant_create(
experiment=experiment,
user=owner,
name="control",
value="control",
weight=Decimal("50.00"),
is_control=True,
)
variant_create(
experiment=experiment,
user=owner,
name="treatment",
value="treatment",
weight=Decimal("50.00"),
is_control=False,
)
def _ensure_running(
self,
*,
experiment: Experiment,
owner: User,
approver: User,
) -> Experiment:
current = Experiment.objects.select_related("flag", "owner").get(
pk=experiment.pk
)
status = current.status
if status in {
ExperimentStatus.COMPLETED,
ExperimentStatus.ARCHIVED,
}:
raise CommandError(
"Reusable experiment is completed/archived. Use a new run-id."
)
if status == ExperimentStatus.REJECTED:
current = experiment_reopen(experiment=current, user=owner)
status = current.status
if status == ExperimentStatus.DRAFT:
current = experiment_submit_for_review(
experiment=current,
user=owner,
)
status = current.status
if status == ExperimentStatus.IN_REVIEW:
if not current.approvals.filter(approver=approver).exists():
current = experiment_approve(
experiment=current,
approver=approver,
comment="k6 fixture approval",
)
status = current.status
if status == ExperimentStatus.IN_REVIEW:
raise CommandError(
"Experiment still in_review after approval. "
"Check review policy for owner."
)
if status == ExperimentStatus.APPROVED:
current = experiment_start(experiment=current, user=owner)
status = current.status
if status == ExperimentStatus.PAUSED:
current = experiment_resume(experiment=current, user=owner)
status = current.status
if status != ExperimentStatus.RUNNING:
current = Experiment.objects.get(pk=current.pk)
raise CommandError(
"Failed to move experiment to running. "
f"Current={current.status}"
)
return current