From f56a26836d1417660b48973434fb00fd0b9b1aa9 Mon Sep 17 00:00:00 2001 From: ITQ Date: Tue, 24 Feb 2026 19:47:49 +0300 Subject: [PATCH] feat(loadtest): added loadtesting with k6 --- .gitignore | 3 + MAP.md | 4 +- README.md | 10 +- infrastructure/k6/README.md | 83 +++++ infrastructure/k6/decide.js | 121 ++++++++ infrastructure/k6/run-decide.sh | 113 +++++++ .../management/commands/prepare_k6_fixture.py | 287 ++++++++++++++++++ 7 files changed, 618 insertions(+), 3 deletions(-) create mode 100644 infrastructure/k6/README.md create mode 100644 infrastructure/k6/decide.js create mode 100755 infrastructure/k6/run-decide.sh create mode 100644 src/backend/apps/decision/management/commands/prepare_k6_fixture.py diff --git a/.gitignore b/.gitignore index 55e51e6..d9c1217 100644 --- a/.gitignore +++ b/.gitignore @@ -27,3 +27,6 @@ Temporary Items # Env files .env + +# Generated artifacts +artifacts/ diff --git a/MAP.md b/MAP.md index 4fb99a2..0cee5e6 100644 --- a/MAP.md +++ b/MAP.md @@ -65,6 +65,6 @@ ## 6. Наблюдаемость и эксплуатация - Health/readiness endpoints: [src/backend/api/urls.py](./src/backend/api/urls.py) -- Prometheus middleware/logging config: [src/backend/config/settings.py](./src/backend/config/settings.py) -- Structured logs: [src/backend/config/settings.py](./src/backend/config/settings.py) +- Prometheus middleware/logging config: [src/backend/config/settings/base.py](./src/backend/config/settings/base.py) +- Structured logs: [src/backend/config/settings/base.py](./src/backend/config/settings/base.py) - CI/CD config: [.gitlab-ci.yml](./.gitlab-ci.yml) diff --git a/README.md b/README.md index b8db21e..78235e6 100644 --- a/README.md +++ b/README.md @@ -81,7 +81,7 @@ Please note that by default containers will use ports 80 (reverse proxy) and ran #### 1. Configuration - Docker compose configuration files are stored in [deploy/compose](./deploy/compose). -- Configuration files for containers are stored in [infrastrucutre/configs](./infrastrucutre/configs). +- Configuration files for containers are stored in [infrastructure/configs](./infrastructure/configs). Env could be customized by creating `.env` file in each service config directory, it will automatically override the default values from `.env.template`. - Ports on which containers will be accessible are defined in [.env.template](./.env.template). This could be customized by creating `.env` file in the root directory and patching the following lines compose you are running: @@ -133,3 +133,11 @@ Example run: ![metrics](./assets/images/metrics.png) System metrics (gc, requests, etc.) and several business metrics (`lotty_decide_requests_total`, `lotty_events_ingested_total`). + +## Load testing (k6) + +Reproducible k6 profile for `POST /api/v1/decide`: + +- scenario script: [infrastructure/k6/decide.js](./infrastructure/k6/decide.js) +- runner: [infrastructure/k6/run-decide.sh](./infrastructure/k6/run-decide.sh) +- guide: [infrastructure/k6/README.md](./infrastructure/k6/README.md) diff --git a/infrastructure/k6/README.md b/infrastructure/k6/README.md new file mode 100644 index 0000000..6c92890 --- /dev/null +++ b/infrastructure/k6/README.md @@ -0,0 +1,83 @@ +# k6 Load Testing + +Reproducible load test profile for `POST /api/v1/decide`. + +## Prerequisites + +- Docker + Docker Compose +- `jq` +- Running stack (`docker compose -f compose.yaml up -d`) + +## One-command run + +```bash +./infrastructure/k6/run-decide.sh +``` + +This command: + +1. Prepares deterministic fixture via `prepare_k6_fixture`. +2. Runs `grafana/k6` in a pinned container image. +3. Saves artifacts to `artifacts/k6//`. + +Artifacts: + +- `fixture.json` +- `run.env` +- `summary.json` + +## Reproducible rerun + +Use the same `RUN_ID` and k6 profile parameters. + +```bash +RUN_ID=baseline_20260224 \ +START_RPS=20 \ +RAMP_UP_RPS=200 \ +HOLD_RPS=200 \ +HOLD_DURATION=2m \ +./infrastructure/k6/run-decide.sh +``` + +## Target URL + +Default target for k6 container: + +- `K6_BASE_URL=http://host.docker.internal` + +Override if needed: + +```bash +K6_BASE_URL=http://host.docker.internal:14609 ./infrastructure/k6/run-decide.sh +``` + +## Profile knobs + +- `START_RPS` +- `RAMP_UP_RPS` +- `HOLD_RPS` +- `RAMP_UP_DURATION` +- `HOLD_DURATION` +- `RAMP_DOWN_DURATION` +- `PRE_ALLOCATED_VUS` +- `MAX_VUS` +- `THRESHOLD_ERROR_RATE` +- `THRESHOLD_P95_MS` +- `THRESHOLD_P99_MS` +- `K6_IMAGE` + +## Compare two runs + +```bash +BASE=artifacts/k6/baseline_20260224/summary.json +CAND=artifacts/k6/candidate_20260224/summary.json + +jq -n --argfile b "$BASE" --argfile c "$CAND" '{ + baseline_p95_ms: $b.metrics.http_req_duration["p(95)"], + candidate_p95_ms: $c.metrics.http_req_duration["p(95)"], + baseline_req_per_s: $b.metrics.http_reqs.rate, + candidate_req_per_s: $c.metrics.http_reqs.rate, + baseline_error_rate: $b.metrics.http_req_failed.value, + candidate_error_rate: $c.metrics.http_req_failed.value +}' +``` diff --git a/infrastructure/k6/decide.js b/infrastructure/k6/decide.js new file mode 100644 index 0000000..2c19242 --- /dev/null +++ b/infrastructure/k6/decide.js @@ -0,0 +1,121 @@ +import http from "k6/http"; +import { check, sleep } from "k6"; +import { Counter, Rate, Trend } from "k6/metrics"; + +const BASE_URL = (__ENV.BASE_URL || "http://host.docker.internal").replace( + /\/$/, + "", +); +const API_URL = `${BASE_URL}/api/v1`; +const FLAG_KEY = __ENV.FLAG_KEY || ""; +const SUBJECT_PREFIX = __ENV.SUBJECT_PREFIX || "k6_subject"; +const SUBJECT_COUNTRY = __ENV.SUBJECT_COUNTRY || "US"; +const SUBJECT_POOL = Number(__ENV.SUBJECT_POOL || "20000"); +const THINK_TIME_SECONDS = Number(__ENV.THINK_TIME_SECONDS || "0"); + +const START_RATE = Number(__ENV.START_RPS || "20"); +const RAMP_UP_RATE = Number(__ENV.RAMP_UP_RPS || "200"); +const HOLD_RATE = Number(__ENV.HOLD_RPS || "200"); +const PRE_ALLOCATED_VUS = Number(__ENV.PRE_ALLOCATED_VUS || "100"); +const MAX_VUS = Number(__ENV.MAX_VUS || "600"); + +const RAMP_UP_DURATION = __ENV.RAMP_UP_DURATION || "30s"; +const HOLD_DURATION = __ENV.HOLD_DURATION || "2m"; +const RAMP_DOWN_DURATION = __ENV.RAMP_DOWN_DURATION || "20s"; + +const THRESHOLD_ERROR_RATE = __ENV.THRESHOLD_ERROR_RATE || "0.01"; +const THRESHOLD_P95_MS = __ENV.THRESHOLD_P95_MS || "250"; +const THRESHOLD_P99_MS = __ENV.THRESHOLD_P99_MS || "500"; + +if (!FLAG_KEY) { + throw new Error("FLAG_KEY is required"); +} + +export const options = { + scenarios: { + decide_hot_path: { + executor: "ramping-arrival-rate", + startRate: START_RATE, + timeUnit: "1s", + preAllocatedVUs: PRE_ALLOCATED_VUS, + maxVUs: MAX_VUS, + stages: [ + { target: RAMP_UP_RATE, duration: RAMP_UP_DURATION }, + { target: HOLD_RATE, duration: HOLD_DURATION }, + { target: 0, duration: RAMP_DOWN_DURATION }, + ], + }, + }, + thresholds: { + http_req_failed: [`rate<${THRESHOLD_ERROR_RATE}`], + http_req_duration: [ + `p(95)<${THRESHOLD_P95_MS}`, + `p(99)<${THRESHOLD_P99_MS}`, + ], + decide_status_200_rate: ["rate>0.99"], + }, + summaryTrendStats: [ + "avg", + "min", + "med", + "p(90)", + "p(95)", + "p(99)", + "max", + ], +}; + +const decideStatus200Rate = new Rate("decide_status_200_rate"); +const decideAssignedRate = new Rate("decide_experiment_assigned_rate"); +const decideRequests = new Counter("decide_requests_total"); +const decideDuration = new Trend("decide_request_duration_ms", true); + +function buildSubjectId() { + const idx = ((__ITER * 104729 + __VU * 8191) % SUBJECT_POOL) + 1; + return `${SUBJECT_PREFIX}_${idx}`; +} + +function buildPayload() { + return JSON.stringify({ + subject_id: buildSubjectId(), + subject_attributes: { country: SUBJECT_COUNTRY }, + flags: [FLAG_KEY], + }); +} + +export default function () { + const response = http.post(`${API_URL}/decide`, buildPayload(), { + headers: { "Content-Type": "application/json" }, + tags: { endpoint: "decide" }, + }); + + decideRequests.add(1); + decideDuration.add(response.timings.duration); + decideStatus200Rate.add(response.status === 200); + + let reason = ""; + if (response.status === 200) { + const body = response.json(); + if (body && body.decisions && body.decisions.length > 0) { + reason = String(body.decisions[0].reason || ""); + } + } + decideAssignedRate.add(reason === "experiment_assigned"); + + check(response, { + "status is 200": (r) => r.status === 200, + "has one decision": (r) => { + const body = r.json(); + return ( + body !== null && + typeof body === "object" && + Array.isArray(body.decisions) && + body.decisions.length === 1 + ); + }, + }); + + if (THINK_TIME_SECONDS > 0) { + sleep(THINK_TIME_SECONDS); + } +} diff --git a/infrastructure/k6/run-decide.sh b/infrastructure/k6/run-decide.sh new file mode 100755 index 0000000..3ed13b9 --- /dev/null +++ b/infrastructure/k6/run-decide.sh @@ -0,0 +1,113 @@ +#!/usr/bin/env bash + +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" + +RUN_ID="${RUN_ID:-$(date -u +%Y%m%d%H%M%S)}" +K6_IMAGE="${K6_IMAGE:-grafana/k6:0.50.0}" +K6_BASE_URL="${K6_BASE_URL:-http://host.docker.internal}" + +START_RPS="${START_RPS:-20}" +RAMP_UP_RPS="${RAMP_UP_RPS:-200}" +HOLD_RPS="${HOLD_RPS:-200}" +PRE_ALLOCATED_VUS="${PRE_ALLOCATED_VUS:-100}" +MAX_VUS="${MAX_VUS:-600}" +RAMP_UP_DURATION="${RAMP_UP_DURATION:-30s}" +HOLD_DURATION="${HOLD_DURATION:-2m}" +RAMP_DOWN_DURATION="${RAMP_DOWN_DURATION:-20s}" + +THRESHOLD_ERROR_RATE="${THRESHOLD_ERROR_RATE:-0.01}" +THRESHOLD_P95_MS="${THRESHOLD_P95_MS:-250}" +THRESHOLD_P99_MS="${THRESHOLD_P99_MS:-500}" + +RESULTS_DIR="${RESULTS_DIR:-$ROOT_DIR/artifacts/k6/$RUN_ID}" +mkdir -p "$RESULTS_DIR" + +prepare_fixture() { + local output="" + + if ( + cd "$ROOT_DIR" && + docker compose exec -T backend true >/dev/null 2>&1 + ); then + if output="$( + cd "$ROOT_DIR" && + docker compose exec -T backend python manage.py prepare_k6_fixture \ + --run-id "$RUN_ID" \ + --json + )"; then + echo "$output" + return + fi + fi + + output="$( + cd "$ROOT_DIR/src/backend" + uv run python manage.py prepare_k6_fixture \ + --run-id "$RUN_ID" \ + --json + )" + echo "$output" +} + +if ! command -v jq >/dev/null 2>&1; then + echo "jq is required" >&2 + exit 1 +fi + +FIXTURE_JSON="$(prepare_fixture)" +echo "$FIXTURE_JSON" >"$RESULTS_DIR/fixture.json" + +FLAG_KEY="$(echo "$FIXTURE_JSON" | jq -r '.flag_key')" +SUBJECT_COUNTRY="$( + echo "$FIXTURE_JSON" | jq -r '.subject_attributes.country // "US"' +)" + +if [[ -z "$FLAG_KEY" || "$FLAG_KEY" == "null" ]]; then + echo "failed to resolve FLAG_KEY from fixture" >&2 + exit 1 +fi + +cat >"$RESULTS_DIR/run.env" < None: + parser.add_argument( + "--run-id", + required=True, + type=str, + ) + parser.add_argument( + "--owner", + default="experimenter", + type=str, + ) + parser.add_argument( + "--approver", + default="approver", + type=str, + ) + parser.add_argument( + "--flag-key", + default=None, + type=str, + ) + parser.add_argument( + "--experiment-name", + default=None, + type=str, + ) + parser.add_argument( + "--targeting-rules", + default='country == "US"', + type=str, + ) + parser.add_argument( + "--json", + action="store_true", + default=False, + ) + + @override + def handle(self, *args, **options) -> None: + run_id_raw: str = options["run_id"] + owner_username: str = options["owner"] + approver_username: str = options["approver"] + flag_key_override: str | None = options["flag_key"] + experiment_name_override: str | None = options["experiment_name"] + targeting_rules: str = options["targeting_rules"] + is_json: bool = options["json"] + + run_id = self._normalize_run_id(run_id_raw) + owner = self._load_user(owner_username, UserRole.EXPERIMENTER) + approver = self._load_user(approver_username, UserRole.APPROVER) + self._ensure_approver_group(owner, approver) + + flag_key = flag_key_override or f"k6_{run_id}_flag" + experiment_name = experiment_name_override or f"k6_{run_id}_experiment" + + flag = self._ensure_flag(flag_key, run_id) + experiment, created = self._resolve_experiment( + flag=flag, + owner=owner, + name=experiment_name, + targeting_rules=targeting_rules, + ) + if created: + self._ensure_variants(experiment=experiment, owner=owner) + + experiment = self._ensure_running( + experiment=experiment, + owner=owner, + approver=approver, + ) + + payload = { + "run_id": run_id, + "flag_id": str(flag.pk), + "flag_key": flag.key, + "experiment_id": str(experiment.pk), + "experiment_status": experiment.status, + "owner": owner.username, + "approver": approver.username, + "subject_attributes": {"country": "US"}, + } + + if is_json: + self.stdout.write(json.dumps(payload)) + return + + self.stdout.write( + self.style.SUCCESS( + f"k6 fixture ready: flag_key={flag.key}, " + f"experiment_id={experiment.pk}, status={experiment.status}" + ) + ) + self.stdout.write(json.dumps(payload, indent=2)) + + def _normalize_run_id(self, value: str) -> str: + normalized = "".join( + ch.lower() if ch.isalnum() else "_" for ch in value.strip() + ).strip("_") + if not normalized: + raise CommandError("run-id cannot be empty.") + if not normalized[0].isalpha(): + normalized = f"r_{normalized}" + return normalized + + def _load_user(self, username: str, expected_role: str) -> User: + user = User.objects.filter(username=username).first() + if user is None: + raise CommandError( + f"User '{username}' was not found. Seed users before running." + ) + if user.role != expected_role: + raise CommandError( + f"User '{username}' must have role '{expected_role}'." + ) + return user + + def _ensure_approver_group(self, owner: User, approver: User) -> None: + group, _ = ApproverGroup.objects.get_or_create( + experimenter=owner, + defaults={"min_approvals": 1}, + ) + if group.min_approvals != 1: + group.min_approvals = 1 + group.save(update_fields=["min_approvals", "updated_at"]) + if not group.approvers.filter(pk=approver.pk).exists(): + group.approvers.add(approver) + + def _ensure_flag(self, key: str, run_id: str) -> FeatureFlag: + flag = feature_flag_get_by_key(key) + if flag: + return flag + return feature_flag_create( + key=key, + name=f"k6 {run_id} decide", + value_type="string", + default_value="control", + ) + + def _resolve_experiment( + self, + *, + flag: FeatureFlag, + owner: User, + name: str, + targeting_rules: str, + ) -> tuple[Experiment, bool]: + reusable = ( + Experiment.objects.filter( + flag=flag, + status__in=( + ExperimentStatus.RUNNING, + ExperimentStatus.PAUSED, + ExperimentStatus.APPROVED, + ExperimentStatus.IN_REVIEW, + ExperimentStatus.DRAFT, + ExperimentStatus.REJECTED, + ), + ) + .order_by("-created_at") + .first() + ) + if reusable: + return reusable, False + + experiment = experiment_create( + flag=flag, + name=name, + owner=owner, + description="k6 decide benchmark fixture", + hypothesis="k6 baseline", + traffic_allocation=Decimal("100.00"), + targeting_rules=targeting_rules, + ) + return experiment, True + + def _ensure_variants(self, *, experiment: Experiment, owner: User) -> None: + if experiment.variants.exists(): + return + variant_create( + experiment=experiment, + user=owner, + name="control", + value="control", + weight=Decimal("50.00"), + is_control=True, + ) + variant_create( + experiment=experiment, + user=owner, + name="treatment", + value="treatment", + weight=Decimal("50.00"), + is_control=False, + ) + + def _ensure_running( + self, + *, + experiment: Experiment, + owner: User, + approver: User, + ) -> Experiment: + current = Experiment.objects.select_related("flag", "owner").get( + pk=experiment.pk + ) + status = current.status + + if status in { + ExperimentStatus.COMPLETED, + ExperimentStatus.ARCHIVED, + }: + raise CommandError( + "Reusable experiment is completed/archived. Use a new run-id." + ) + + if status == ExperimentStatus.REJECTED: + current = experiment_reopen(experiment=current, user=owner) + status = current.status + + if status == ExperimentStatus.DRAFT: + current = experiment_submit_for_review( + experiment=current, + user=owner, + ) + status = current.status + + if status == ExperimentStatus.IN_REVIEW: + if not current.approvals.filter(approver=approver).exists(): + current = experiment_approve( + experiment=current, + approver=approver, + comment="k6 fixture approval", + ) + status = current.status + if status == ExperimentStatus.IN_REVIEW: + raise CommandError( + "Experiment still in_review after approval. " + "Check review policy for owner." + ) + + if status == ExperimentStatus.APPROVED: + current = experiment_start(experiment=current, user=owner) + status = current.status + + if status == ExperimentStatus.PAUSED: + current = experiment_resume(experiment=current, user=owner) + status = current.status + + if status != ExperimentStatus.RUNNING: + current = Experiment.objects.get(pk=current.pk) + raise CommandError( + "Failed to move experiment to running. " + f"Current={current.status}" + ) + + return current