test(integration): added integration tests

This commit is contained in:
ITQ
2026-02-23 11:48:03 +03:00
parent 3003ac888b
commit 15d80532e0
8 changed files with 1456 additions and 0 deletions
@@ -0,0 +1,290 @@
from decimal import Decimal
from typing import override
from django.core.cache import cache
from django.test import TestCase
from django.utils import timezone
from apps.decision.services import decide_for_flag
from apps.events.services import process_events_batch
from apps.events.tests.helpers import make_event_type, make_exposure_type
from apps.experiments.models import ExperimentOutcome, ExperimentStatus
from apps.experiments.services import (
experiment_approve,
experiment_start,
experiment_submit_for_review,
)
from apps.experiments.tests.helpers import add_two_variants, make_experiment
from apps.guardrails.models import GuardrailAction, GuardrailTrigger
from apps.guardrails.services import (
check_all_running_experiments,
check_experiment_guardrails,
guardrail_create,
)
from apps.metrics.services import (
experiment_metric_add,
metric_definition_create,
)
from apps.reviews.services import review_settings_update
from apps.reviews.tests.helpers import make_approver, make_experimenter
def _start_experiment(owner, approver, suffix, traffic=Decimal("100.00")):
experiment = make_experiment(
owner=owner,
suffix=suffix,
traffic_allocation=traffic,
)
add_two_variants(experiment)
experiment = experiment_submit_for_review(
experiment=experiment, user=owner
)
experiment = experiment_approve(experiment=experiment, approver=approver)
return experiment_start(experiment=experiment, user=owner)
class GuardrailPauseIntegrationTest(TestCase):
@override
def setUp(self) -> None:
cache.clear()
review_settings_update(
default_min_approvals=1,
allow_any_approver=True,
)
self.owner = make_experimenter("_gpi")
self.approver = make_approver("_gpi")
self.experiment = _start_experiment(self.owner, self.approver, "_gpi")
self.error_metric = metric_definition_create(
key="error_rate_gpi",
name="Error Rate",
metric_type="ratio",
direction="lower_is_better",
calculation_rule={
"numerator_event": "gpi_error",
"denominator_event": "gpi_exposure",
},
)
experiment_metric_add(
experiment=self.experiment,
metric=self.error_metric,
)
make_exposure_type(name="gpi_exposure")
make_event_type(name="gpi_error", display_name="Error")
guardrail_create(
experiment=self.experiment,
metric=self.error_metric,
threshold=Decimal("0.10"),
observation_window_minutes=60,
action=GuardrailAction.PAUSE,
)
def test_guardrail_pauses_experiment_on_threshold_breach(self) -> None:
now = timezone.now().isoformat()
cache.clear()
d = decide_for_flag("flag_gpi", "user_gp1", {})
self.assertEqual(d["reason"], "experiment_assigned")
process_events_batch(
[
{
"event_id": "gpi_exp_1",
"event_type": "gpi_exposure",
"decision_id": d["decision_id"],
"subject_id": "user_gp1",
"timestamp": now,
"properties": {},
},
{
"event_id": "gpi_err_1",
"event_type": "gpi_error",
"decision_id": d["decision_id"],
"subject_id": "user_gp1",
"timestamp": now,
"properties": {},
},
]
)
triggers = check_experiment_guardrails(self.experiment)
self.assertTrue(len(triggers) > 0)
self.experiment.refresh_from_db()
self.assertEqual(self.experiment.status, ExperimentStatus.PAUSED)
self.assertTrue(
GuardrailTrigger.objects.filter(
experiment=self.experiment
).exists()
)
def test_no_trigger_when_metric_below_threshold(self) -> None:
now = timezone.now().isoformat()
cache.clear()
decisions = []
for i in range(10):
cache.clear()
d = decide_for_flag("flag_gpi", f"user_ok_{i}", {})
decisions.append(d)
events = [
{
"event_id": f"gpi_ok_exp_{i}",
"event_type": "gpi_exposure",
"decision_id": d["decision_id"],
"subject_id": f"user_ok_{i}",
"timestamp": now,
"properties": {},
}
for i, d in enumerate(decisions)
]
process_events_batch(events)
triggers = check_experiment_guardrails(self.experiment)
self.assertEqual(len(triggers), 0)
self.experiment.refresh_from_db()
self.assertEqual(self.experiment.status, ExperimentStatus.RUNNING)
class GuardrailRollbackIntegrationTest(TestCase):
@override
def setUp(self) -> None:
cache.clear()
review_settings_update(
default_min_approvals=1,
allow_any_approver=True,
)
self.owner = make_experimenter("_gri")
self.approver = make_approver("_gri")
self.experiment = _start_experiment(self.owner, self.approver, "_gri")
self.crash_metric = metric_definition_create(
key="crash_rate_gri",
name="Crash Rate",
metric_type="ratio",
direction="lower_is_better",
calculation_rule={
"numerator_event": "gri_crash",
"denominator_event": "gri_exposure",
},
)
experiment_metric_add(
experiment=self.experiment,
metric=self.crash_metric,
)
make_exposure_type(name="gri_exposure")
make_event_type(name="gri_crash", display_name="Crash")
guardrail_create(
experiment=self.experiment,
metric=self.crash_metric,
threshold=Decimal("0.05"),
action=GuardrailAction.ROLLBACK,
)
def test_rollback_completes_experiment_with_control_winner(self) -> None:
now = timezone.now().isoformat()
cache.clear()
d = decide_for_flag("flag_gri", "user_rb1", {})
process_events_batch(
[
{
"event_id": "gri_exp_1",
"event_type": "gri_exposure",
"decision_id": d["decision_id"],
"subject_id": "user_rb1",
"timestamp": now,
"properties": {},
},
{
"event_id": "gri_crash_1",
"event_type": "gri_crash",
"decision_id": d["decision_id"],
"subject_id": "user_rb1",
"timestamp": now,
"properties": {},
},
]
)
triggers = check_experiment_guardrails(self.experiment)
self.assertTrue(len(triggers) > 0)
self.experiment.refresh_from_db()
self.assertEqual(self.experiment.status, ExperimentStatus.COMPLETED)
outcome = ExperimentOutcome.objects.get(experiment=self.experiment)
self.assertEqual(outcome.outcome, "rollback")
class GuardrailCheckAllTest(TestCase):
@override
def setUp(self) -> None:
cache.clear()
review_settings_update(
default_min_approvals=1,
allow_any_approver=True,
)
self.owner = make_experimenter("_gca")
self.approver = make_approver("_gca")
self.exp1 = _start_experiment(self.owner, self.approver, "_gca1")
self.exp2 = _start_experiment(self.owner, self.approver, "_gca2")
self.metric = metric_definition_create(
key="err_gca",
name="Error",
metric_type="ratio",
direction="lower_is_better",
calculation_rule={
"numerator_event": "gca_error",
"denominator_event": "gca_exposure",
},
)
for exp in (self.exp1, self.exp2):
experiment_metric_add(experiment=exp, metric=self.metric)
guardrail_create(
experiment=exp,
metric=self.metric,
threshold=Decimal("0.10"),
action=GuardrailAction.PAUSE,
)
make_exposure_type(name="gca_exposure")
make_event_type(name="gca_error", display_name="Error")
def test_check_all_processes_multiple_experiments(self) -> None:
now = timezone.now().isoformat()
for suffix, _ in [("gca1", self.exp1), ("gca2", self.exp2)]:
cache.clear()
d = decide_for_flag(f"flag_{suffix}", "user_ca", {})
process_events_batch(
[
{
"event_id": f"{suffix}_exp",
"event_type": "gca_exposure",
"decision_id": d["decision_id"],
"subject_id": "user_ca",
"timestamp": now,
"properties": {},
},
{
"event_id": f"{suffix}_err",
"event_type": "gca_error",
"decision_id": d["decision_id"],
"subject_id": "user_ca",
"timestamp": now,
"properties": {},
},
]
)
results = check_all_running_experiments()
self.assertEqual(results["checked"], 2)
self.assertGreaterEqual(results["triggered"], 1)