test(integration): added integration tests

2026-02-23 11:48:03 +03:00
parent 3003ac888b
commit 15d80532e0
8 changed files with 1456 additions and 0 deletions
@@ -0,0 +1,290 @@
+from decimal import Decimal
+from typing import override
+
+from django.core.cache import cache
+from django.test import TestCase
+from django.utils import timezone
+
+from apps.decision.services import decide_for_flag
+from apps.events.services import process_events_batch
+from apps.events.tests.helpers import make_event_type, make_exposure_type
+from apps.experiments.models import ExperimentOutcome, ExperimentStatus
+from apps.experiments.services import (
+    experiment_approve,
+    experiment_start,
+    experiment_submit_for_review,
+)
+from apps.experiments.tests.helpers import add_two_variants, make_experiment
+from apps.guardrails.models import GuardrailAction, GuardrailTrigger
+from apps.guardrails.services import (
+    check_all_running_experiments,
+    check_experiment_guardrails,
+    guardrail_create,
+)
+from apps.metrics.services import (
+    experiment_metric_add,
+    metric_definition_create,
+)
+from apps.reviews.services import review_settings_update
+from apps.reviews.tests.helpers import make_approver, make_experimenter
+
+
+def _start_experiment(owner, approver, suffix, traffic=Decimal("100.00")):
+    experiment = make_experiment(
+        owner=owner,
+        suffix=suffix,
+        traffic_allocation=traffic,
+    )
+    add_two_variants(experiment)
+    experiment = experiment_submit_for_review(
+        experiment=experiment, user=owner
+    )
+    experiment = experiment_approve(experiment=experiment, approver=approver)
+    return experiment_start(experiment=experiment, user=owner)
+
+
+class GuardrailPauseIntegrationTest(TestCase):
+    @override
+    def setUp(self) -> None:
+        cache.clear()
+        review_settings_update(
+            default_min_approvals=1,
+            allow_any_approver=True,
+        )
+        self.owner = make_experimenter("_gpi")
+        self.approver = make_approver("_gpi")
+
+        self.experiment = _start_experiment(self.owner, self.approver, "_gpi")
+
+        self.error_metric = metric_definition_create(
+            key="error_rate_gpi",
+            name="Error Rate",
+            metric_type="ratio",
+            direction="lower_is_better",
+            calculation_rule={
+                "numerator_event": "gpi_error",
+                "denominator_event": "gpi_exposure",
+            },
+        )
+        experiment_metric_add(
+            experiment=self.experiment,
+            metric=self.error_metric,
+        )
+
+        make_exposure_type(name="gpi_exposure")
+        make_event_type(name="gpi_error", display_name="Error")
+
+        guardrail_create(
+            experiment=self.experiment,
+            metric=self.error_metric,
+            threshold=Decimal("0.10"),
+            observation_window_minutes=60,
+            action=GuardrailAction.PAUSE,
+        )
+
+    def test_guardrail_pauses_experiment_on_threshold_breach(self) -> None:
+        now = timezone.now().isoformat()
+        cache.clear()
+        d = decide_for_flag("flag_gpi", "user_gp1", {})
+        self.assertEqual(d["reason"], "experiment_assigned")
+
+        process_events_batch(
+            [
+                {
+                    "event_id": "gpi_exp_1",
+                    "event_type": "gpi_exposure",
+                    "decision_id": d["decision_id"],
+                    "subject_id": "user_gp1",
+                    "timestamp": now,
+                    "properties": {},
+                },
+                {
+                    "event_id": "gpi_err_1",
+                    "event_type": "gpi_error",
+                    "decision_id": d["decision_id"],
+                    "subject_id": "user_gp1",
+                    "timestamp": now,
+                    "properties": {},
+                },
+            ]
+        )
+
+        triggers = check_experiment_guardrails(self.experiment)
+        self.assertTrue(len(triggers) > 0)
+
+        self.experiment.refresh_from_db()
+        self.assertEqual(self.experiment.status, ExperimentStatus.PAUSED)
+        self.assertTrue(
+            GuardrailTrigger.objects.filter(
+                experiment=self.experiment
+            ).exists()
+        )
+
+    def test_no_trigger_when_metric_below_threshold(self) -> None:
+        now = timezone.now().isoformat()
+        cache.clear()
+        decisions = []
+        for i in range(10):
+            cache.clear()
+            d = decide_for_flag("flag_gpi", f"user_ok_{i}", {})
+            decisions.append(d)
+
+        events = [
+            {
+                "event_id": f"gpi_ok_exp_{i}",
+                "event_type": "gpi_exposure",
+                "decision_id": d["decision_id"],
+                "subject_id": f"user_ok_{i}",
+                "timestamp": now,
+                "properties": {},
+            }
+            for i, d in enumerate(decisions)
+        ]
+        process_events_batch(events)
+
+        triggers = check_experiment_guardrails(self.experiment)
+        self.assertEqual(len(triggers), 0)
+        self.experiment.refresh_from_db()
+        self.assertEqual(self.experiment.status, ExperimentStatus.RUNNING)
+
+
+class GuardrailRollbackIntegrationTest(TestCase):
+    @override
+    def setUp(self) -> None:
+        cache.clear()
+        review_settings_update(
+            default_min_approvals=1,
+            allow_any_approver=True,
+        )
+        self.owner = make_experimenter("_gri")
+        self.approver = make_approver("_gri")
+
+        self.experiment = _start_experiment(self.owner, self.approver, "_gri")
+
+        self.crash_metric = metric_definition_create(
+            key="crash_rate_gri",
+            name="Crash Rate",
+            metric_type="ratio",
+            direction="lower_is_better",
+            calculation_rule={
+                "numerator_event": "gri_crash",
+                "denominator_event": "gri_exposure",
+            },
+        )
+        experiment_metric_add(
+            experiment=self.experiment,
+            metric=self.crash_metric,
+        )
+
+        make_exposure_type(name="gri_exposure")
+        make_event_type(name="gri_crash", display_name="Crash")
+
+        guardrail_create(
+            experiment=self.experiment,
+            metric=self.crash_metric,
+            threshold=Decimal("0.05"),
+            action=GuardrailAction.ROLLBACK,
+        )
+
+    def test_rollback_completes_experiment_with_control_winner(self) -> None:
+        now = timezone.now().isoformat()
+        cache.clear()
+        d = decide_for_flag("flag_gri", "user_rb1", {})
+
+        process_events_batch(
+            [
+                {
+                    "event_id": "gri_exp_1",
+                    "event_type": "gri_exposure",
+                    "decision_id": d["decision_id"],
+                    "subject_id": "user_rb1",
+                    "timestamp": now,
+                    "properties": {},
+                },
+                {
+                    "event_id": "gri_crash_1",
+                    "event_type": "gri_crash",
+                    "decision_id": d["decision_id"],
+                    "subject_id": "user_rb1",
+                    "timestamp": now,
+                    "properties": {},
+                },
+            ]
+        )
+
+        triggers = check_experiment_guardrails(self.experiment)
+        self.assertTrue(len(triggers) > 0)
+
+        self.experiment.refresh_from_db()
+        self.assertEqual(self.experiment.status, ExperimentStatus.COMPLETED)
+
+        outcome = ExperimentOutcome.objects.get(experiment=self.experiment)
+        self.assertEqual(outcome.outcome, "rollback")
+
+
+class GuardrailCheckAllTest(TestCase):
+    @override
+    def setUp(self) -> None:
+        cache.clear()
+        review_settings_update(
+            default_min_approvals=1,
+            allow_any_approver=True,
+        )
+        self.owner = make_experimenter("_gca")
+        self.approver = make_approver("_gca")
+
+        self.exp1 = _start_experiment(self.owner, self.approver, "_gca1")
+        self.exp2 = _start_experiment(self.owner, self.approver, "_gca2")
+
+        self.metric = metric_definition_create(
+            key="err_gca",
+            name="Error",
+            metric_type="ratio",
+            direction="lower_is_better",
+            calculation_rule={
+                "numerator_event": "gca_error",
+                "denominator_event": "gca_exposure",
+            },
+        )
+        for exp in (self.exp1, self.exp2):
+            experiment_metric_add(experiment=exp, metric=self.metric)
+            guardrail_create(
+                experiment=exp,
+                metric=self.metric,
+                threshold=Decimal("0.10"),
+                action=GuardrailAction.PAUSE,
+            )
+
+        make_exposure_type(name="gca_exposure")
+        make_event_type(name="gca_error", display_name="Error")
+
+    def test_check_all_processes_multiple_experiments(self) -> None:
+        now = timezone.now().isoformat()
+
+        for suffix, _ in [("gca1", self.exp1), ("gca2", self.exp2)]:
+            cache.clear()
+            d = decide_for_flag(f"flag_{suffix}", "user_ca", {})
+            process_events_batch(
+                [
+                    {
+                        "event_id": f"{suffix}_exp",
+                        "event_type": "gca_exposure",
+                        "decision_id": d["decision_id"],
+                        "subject_id": "user_ca",
+                        "timestamp": now,
+                        "properties": {},
+                    },
+                    {
+                        "event_id": f"{suffix}_err",
+                        "event_type": "gca_error",
+                        "decision_id": d["decision_id"],
+                        "subject_id": "user_ca",
+                        "timestamp": now,
+                        "properties": {},
+                    },
+                ]
+            )
+
+        results = check_all_running_experiments()
+        self.assertEqual(results["checked"], 2)
+        self.assertGreaterEqual(results["triggered"], 1)