chore(): added async decision persistence

2026-02-24 17:55:14 +03:00
parent 740fd2d7bd
commit cda60bb057
8 changed files with 515 additions and 108 deletions
@@ -12,6 +12,8 @@ DJANGO_STATIC_URL=static/
 REDIS_URI=
 DJANGO_DB_URI=sqlite:///db.sqlite3
 DJANGO_CONN_MAX_AGE=300
 DECISION_RESULT_CACHE_TTL_SECONDS=60
 DECISION_WRITE_MODE=sync
 DJANGO_SILKY_ENABLED=False
 DJANGO_SILKY_PYTHON_PROFILER=False
@@ -54,6 +54,18 @@ class ChannelAPITest(TestCase):
        self.assertEqual(resp.status_code, 201)
        self.assertEqual(resp.json()["channel_type"], ChannelType.SMTP)
    def test_create_telegram_channel_invalid_config(self) -> None:
        resp = self._create_channel(config={"bot_token": "tok"})
        self.assertEqual(resp.status_code, 422)
    def test_create_smtp_channel_invalid_config(self) -> None:
        resp = self._create_channel(
            channel_type=ChannelType.SMTP,
            name="Email",
            config={},
        )
        self.assertEqual(resp.status_code, 422)
    def test_list_channels(self) -> None:
        self._create_channel()
        resp = self.client.get(
@@ -92,6 +104,17 @@ class ChannelAPITest(TestCase):
        self.assertEqual(resp.status_code, 200)
        self.assertEqual(resp.json()["name"], "Updated")
    def test_update_channel_invalid_config(self) -> None:
        create_resp = self._create_channel()
        ch_id = create_resp.json()["id"]
        resp = self.client.patch(
            reverse("api-1:update_channel", args=[ch_id]),
            data=json.dumps({"config": {"bot_token": "tok"}}),
            content_type="application/json",
            HTTP_AUTHORIZATION=self.auth,
        )
        self.assertEqual(resp.status_code, 422)
    def test_delete_channel(self) -> None:
        create_resp = self._create_channel()
        ch_id = create_resp.json()["id"]
@@ -1,9 +1,11 @@
 import hashlib
 import json
 import logging
 import uuid
 from datetime import timedelta
 from decimal import Decimal
 from django.conf import settings
 from django.core.cache import cache
 from django.utils import timezone
 from prometheus_client import Counter
@@ -12,6 +14,7 @@ from apps.conflicts.models import ExperimentConflictDomain
 from apps.conflicts.services import resolve_domain_conflict
 from apps.events.models import Decision
 from apps.events.services import decision_create
 from apps.events.tasks import persist_decision_task
 from apps.experiments.models import (
    ACTIVE_STATUSES,
    Experiment,
@@ -36,6 +39,25 @@ FLAG_CACHE_TTL = 300
 EXPERIMENT_CACHE_TTL = 60
 MAX_CONCURRENT_EXPERIMENTS = 3
 COOLDOWN_DAYS = 7
 DECISION_WRITE_MODE_SYNC = "sync"
 DECISION_WRITE_MODE_ASYNC = "async"
 DECISION_WRITE_MODE_DISABLED = "disabled"
 DECISION_WRITE_MODE_VALUES = {
    DECISION_WRITE_MODE_SYNC,
    DECISION_WRITE_MODE_ASYNC,
    DECISION_WRITE_MODE_DISABLED,
 }
 DECISION_FORCE_SYNC_REASON = "experiment_assigned"
 DECISION_CACHEABLE_REASONS = frozenset(
    {
        "flag_not_found",
        "no_active_experiment",
        "targeting_mismatch",
        "outside_traffic_allocation",
        "no_variants",
        "experiment_assigned",
    }
 )
 def _hash_subject(subject_id: str, experiment_id: str, salt: str) -> Decimal:
@@ -56,16 +78,156 @@ def _select_variant(
    return variants[-1] if variants else None
-def _persist_decision(result: dict, subject_id: str) -> None:
+def _decision_result_cache_ttl() -> int:
-    decision_create(
+    ttl = int(
-        decision_id=result["decision_id"],
+        getattr(settings, "DECISION_RESULT_CACHE_TTL_SECONDS", 60),
        flag_key=result["flag"],
        subject_id=subject_id,
        experiment_id=result.get("experiment_id"),
        variant_id=result.get("variant_id"),
        value=str(result["value"]) if result["value"] is not None else "",
        reason=result["reason"],
    )
    return max(ttl, 0)
 def _decision_write_mode() -> str:
    mode = str(
        getattr(
            settings,
            "DECISION_WRITE_MODE",
            DECISION_WRITE_MODE_SYNC,
        )
    ).lower()
    if mode in DECISION_WRITE_MODE_VALUES:
        return mode
    return DECISION_WRITE_MODE_SYNC
 def _subject_attributes_digest(subject_attributes: dict) -> str:
    encoded = json.dumps(
        subject_attributes,
        sort_keys=True,
        separators=(",", ":"),
        default=str,
    ).encode()
    return hashlib.sha256(encoded).hexdigest()
 def _decision_cache_key(
    *,
    flag_key: str,
    subject_id: str,
    subject_attributes_digest: str,
    flag: FeatureFlag | None,
    experiment: Experiment | None,
 ) -> str:
    flag_revision = flag.updated_at.isoformat() if flag else "missing"
    experiment_id = str(experiment.pk) if experiment else "none"
    experiment_status = experiment.status if experiment else "none"
    experiment_version = str(experiment.version) if experiment else "none"
    experiment_revision = (
        experiment.updated_at.isoformat() if experiment else "none"
    )
    return (
        "decide_result:"
        f"{flag_key}:"
        f"{subject_id}:"
        f"{subject_attributes_digest}:"
        f"{flag_revision}:"
        f"{experiment_id}:"
        f"{experiment_status}:"
        f"{experiment_version}:"
        f"{experiment_revision}"
    )
 def _decision_template(result: dict) -> dict:
    return {
        "flag": result["flag"],
        "value": result["value"],
        "experiment_id": result.get("experiment_id"),
        "variant_id": result.get("variant_id"),
        "reason": result["reason"],
    }
 def _decision_from_template(template: dict) -> dict:
    return {
        "flag": template["flag"],
        "value": template["value"],
        "decision_id": str(uuid.uuid4()),
        "experiment_id": template.get("experiment_id"),
        "variant_id": template.get("variant_id"),
        "reason": template["reason"],
    }
 def _cache_get_decision(cache_key: str) -> dict | None:
    cached = cache.get(cache_key)
    if not isinstance(cached, dict):
        return None
    required = {"flag", "value", "reason"}
    if not required.issubset(cached.keys()):
        return None
    return _decision_from_template(cached)
 def _cache_set_decision(cache_key: str, result: dict) -> None:
    if result["reason"] not in DECISION_CACHEABLE_REASONS:
        return
    ttl = _decision_result_cache_ttl()
    if ttl == 0:
        return
    cache.set(cache_key, _decision_template(result), ttl)
 def _build_result(
    *,
    flag_key: str,
    value: str | int | float | bool | None,
    reason: str,
    experiment_id: str | None = None,
    variant_id: str | None = None,
 ) -> dict:
    return {
        "flag": flag_key,
        "value": value,
        "decision_id": str(uuid.uuid4()),
        "experiment_id": experiment_id,
        "variant_id": variant_id,
        "reason": reason,
    }
 def _decision_payload(result: dict, subject_id: str) -> dict:
    return {
        "decision_id": result["decision_id"],
        "flag_key": result["flag"],
        "subject_id": subject_id,
        "experiment_id": result.get("experiment_id"),
        "variant_id": result.get("variant_id"),
        "value": str(result["value"])
        if result["value"] is not None
        else "",
        "reason": result["reason"],
    }
 def _persist_decision(result: dict, subject_id: str) -> None:
    payload = _decision_payload(result, subject_id)
    mode = _decision_write_mode()
    is_force_sync = result["reason"] == DECISION_FORCE_SYNC_REASON
    if mode == DECISION_WRITE_MODE_DISABLED and not is_force_sync:
        return
    if mode == DECISION_WRITE_MODE_ASYNC and not is_force_sync:
        try:
            persist_decision_task.delay(**payload)
        except Exception:
            logger.exception(
                "decision_async_persist_failed",
                extra={"reason": result["reason"]},
            )
        else:
            return
    decision_create(**payload)
 def _cached_flag_get(flag_key: str) -> FeatureFlag | None:
@@ -171,77 +333,107 @@ def _check_domain_conflicts(
    return True
 def _finalize_result(
    *,
    result: dict,
    subject_id: str,
    cache_key: str,
 ) -> dict:
    DECIDE_REQUESTS.labels(reason=result["reason"]).inc()
    _cache_set_decision(cache_key, result)
    _persist_decision(result, subject_id)
    return result
 def decide_for_flag(
    flag_key: str,
    subject_id: str,
    subject_attributes: dict,
 ) -> dict:
-    flag = _cached_flag_get(flag_key)
+    subject_attributes_digest = _subject_attributes_digest(
-    if not flag:
+        subject_attributes,
-        DECIDE_REQUESTS.labels(reason="flag_not_found").inc()
+    )
-        result = {
+
-            "flag": flag_key,
+    flag = _cached_flag_get(flag_key)
-            "value": None,
+    experiment = _cached_active_experiment(flag.pk) if flag else None
-            "decision_id": str(uuid.uuid4()),
+    cache_key = _decision_cache_key(
-            "experiment_id": None,
+        flag_key=flag_key,
-            "variant_id": None,
+        subject_id=subject_id,
-            "reason": "flag_not_found",
+        subject_attributes_digest=subject_attributes_digest,
-        }
+        flag=flag,
-        _persist_decision(result, subject_id)
+        experiment=experiment,
-        return result
+    )
    cached_result = _cache_get_decision(cache_key)
    if cached_result is not None:
        return _finalize_result(
            result=cached_result,
            subject_id=subject_id,
            cache_key=cache_key,
        )
    if not flag:
        result = _build_result(
            flag_key=flag_key,
            value=None,
            reason="flag_not_found",
        )
        return _finalize_result(
            result=result,
            subject_id=subject_id,
            cache_key=cache_key,
        )
    experiment = _cached_active_experiment(flag.pk)
    if not experiment or experiment.status != ExperimentStatus.RUNNING:
-        DECIDE_REQUESTS.labels(reason="no_active_experiment").inc()
+        result = _build_result(
-        result = {
+            flag_key=flag_key,
-            "flag": flag_key,
+            value=flag.default_value,
-            "value": flag.default_value,
+            reason="no_active_experiment",
-            "decision_id": str(uuid.uuid4()),
+        )
-            "experiment_id": None,
+        return _finalize_result(
-            "variant_id": None,
+            result=result,
-            "reason": "no_active_experiment",
+            subject_id=subject_id,
-        }
+            cache_key=cache_key,
-        _persist_decision(result, subject_id)
+        )
        return result
    if not _check_targeting(experiment.targeting_rules, subject_attributes):
-        DECIDE_REQUESTS.labels(reason="targeting_mismatch").inc()
+        result = _build_result(
-        result = {
+            flag_key=flag_key,
-            "flag": flag_key,
+            value=flag.default_value,
-            "value": flag.default_value,
+            reason="targeting_mismatch",
-            "decision_id": str(uuid.uuid4()),
+            experiment_id=str(experiment.pk),
-            "experiment_id": str(experiment.pk),
+        )
-            "variant_id": None,
+        return _finalize_result(
-            "reason": "targeting_mismatch",
+            result=result,
-        }
+            subject_id=subject_id,
-        _persist_decision(result, subject_id)
+            cache_key=cache_key,
-        return result
+        )
    if not _check_participation_limits(subject_id, experiment.pk):
-        DECIDE_REQUESTS.labels(reason="participation_limit").inc()
+        result = _build_result(
-        result = {
+            flag_key=flag_key,
-            "flag": flag_key,
+            value=flag.default_value,
-            "value": flag.default_value,
+            reason="participation_limit",
-            "decision_id": str(uuid.uuid4()),
+            experiment_id=str(experiment.pk),
-            "experiment_id": str(experiment.pk),
+        )
-            "variant_id": None,
+        return _finalize_result(
-            "reason": "participation_limit",
+            result=result,
-        }
+            subject_id=subject_id,
-        _persist_decision(result, subject_id)
+            cache_key=cache_key,
-        return result
+        )
    if not _check_domain_conflicts(experiment, subject_id):
-        DECIDE_REQUESTS.labels(reason="domain_conflict").inc()
+        result = _build_result(
-        result = {
+            flag_key=flag_key,
-            "flag": flag_key,
+            value=flag.default_value,
-            "value": flag.default_value,
+            reason="domain_conflict",
-            "decision_id": str(uuid.uuid4()),
+            experiment_id=str(experiment.pk),
-            "experiment_id": str(experiment.pk),
+        )
-            "variant_id": None,
+        return _finalize_result(
-            "reason": "domain_conflict",
+            result=result,
-        }
+            subject_id=subject_id,
-        _persist_decision(result, subject_id)
+            cache_key=cache_key,
-        return result
+        )
    allocation_hash = _hash_subject(
        subject_id,
@@ -249,31 +441,31 @@ def decide_for_flag(
        "allocation",
    )
    if allocation_hash >= experiment.traffic_allocation:
-        DECIDE_REQUESTS.labels(reason="outside_traffic_allocation").inc()
+        result = _build_result(
-        result = {
+            flag_key=flag_key,
-            "flag": flag_key,
+            value=flag.default_value,
-            "value": flag.default_value,
+            reason="outside_traffic_allocation",
-            "decision_id": str(uuid.uuid4()),
+            experiment_id=str(experiment.pk),
-            "experiment_id": str(experiment.pk),
+        )
-            "variant_id": None,
+        return _finalize_result(
-            "reason": "outside_traffic_allocation",
+            result=result,
-        }
+            subject_id=subject_id,
-        _persist_decision(result, subject_id)
+            cache_key=cache_key,
-        return result
+        )
    variants = list(experiment.variants.all())
    if not variants:
-        DECIDE_REQUESTS.labels(reason="no_variants").inc()
+        result = _build_result(
-        result = {
+            flag_key=flag_key,
-            "flag": flag_key,
+            value=flag.default_value,
-            "value": flag.default_value,
+            reason="no_variants",
-            "decision_id": str(uuid.uuid4()),
+            experiment_id=str(experiment.pk),
-            "experiment_id": str(experiment.pk),
+        )
-            "variant_id": None,
+        return _finalize_result(
-            "reason": "no_variants",
+            result=result,
-        }
+            subject_id=subject_id,
-        _persist_decision(result, subject_id)
+            cache_key=cache_key,
-        return result
+        )
    variant_hash = _hash_subject(
        subject_id,
@@ -284,14 +476,15 @@ def decide_for_flag(
    normalized_hash = variant_hash * total_weight / Decimal(100)
    selected = _select_variant(variants, normalized_hash)
-    DECIDE_REQUESTS.labels(reason="experiment_assigned").inc()
+    result = _build_result(
-    result = {
+        flag_key=flag_key,
-        "flag": flag_key,
+        value=selected.value if selected else flag.default_value,
-        "value": selected.value if selected else flag.default_value,
+        reason="experiment_assigned",
-        "decision_id": str(uuid.uuid4()),
+        experiment_id=str(experiment.pk),
-        "experiment_id": str(experiment.pk),
+        variant_id=str(selected.pk) if selected else None,
-        "variant_id": str(selected.pk) if selected else None,
+    )
-        "reason": "experiment_assigned",
+    return _finalize_result(
-    }
+        result=result,
-    _persist_decision(result, subject_id)
+        subject_id=subject_id,
-    return result
+        cache_key=cache_key,
    )
@@ -1,8 +1,9 @@
 from decimal import Decimal
 from typing import override
 from unittest.mock import patch
 from django.core.cache import cache
-from django.test import TestCase
+from django.test import TestCase, override_settings
 from apps.decision.services import (
    MAX_CONCURRENT_EXPERIMENTS,
@@ -10,6 +11,7 @@ from apps.decision.services import (
    _select_variant,
    decide_for_flag,
 )
 from apps.events.models import Decision
 from apps.experiments.models import Experiment, ExperimentStatus
 from apps.experiments.services import variant_create
 from apps.experiments.tests.helpers import make_experiment, make_flag
@@ -37,11 +39,12 @@ class HashSubjectTest(TestCase):
 class SelectVariantTest(TestCase):
    def test_selects_by_weight(self) -> None:
        class FV:
-            def __init__(self, n, w):
+            def __init__(self, n, w, i):
                self.name = n
                self.weight = Decimal(str(w))
                self.id = i
-        variants = [FV("a", 50), FV("b", 50)]
+        variants = [FV("a", 50, "uuid-a"), FV("b", 50, "uuid-b")]
        self.assertEqual(_select_variant(variants, Decimal(10)).name, "a")
        self.assertEqual(_select_variant(variants, Decimal(60)).name, "b")
@@ -370,3 +373,146 @@ class PartialTrafficVariantDistributionTest(TestCase):
        ratio = counts["ctrl"] / total
        self.assertGreater(ratio, 0.3)
        self.assertLess(ratio, 0.7)
 class DecisionCachingTest(TestCase):
    @override
    def setUp(self) -> None:
        cache.clear()
        self.owner = make_user(
            username="cache_owner",
            email="cache_owner@lotty.local",
        )
        self.flag = make_flag(suffix="_cache", default="default_val")
        self.experiment = make_experiment(
            flag=self.flag,
            owner=self.owner,
            suffix="_cache",
            traffic_allocation=Decimal("100.00"),
        )
        variant_create(
            experiment=self.experiment,
            user=self.owner,
            name="control",
            value="ctrl",
            weight=Decimal("50.00"),
            is_control=True,
        )
        variant_create(
            experiment=self.experiment,
            user=self.owner,
            name="treatment",
            value="treat",
            weight=Decimal("50.00"),
        )
        Experiment.objects.filter(pk=self.experiment.pk).update(
            status=ExperimentStatus.RUNNING,
        )
    def test_cached_result_skips_recalculation(self) -> None:
        first = decide_for_flag(
            self.flag.key,
            "cached_subject",
            {"country": "US"},
        )
        with patch(
            "apps.decision.services._check_targeting",
            side_effect=AssertionError("cache miss"),
        ):
            second = decide_for_flag(
                self.flag.key,
                "cached_subject",
                {"country": "US"},
            )
        self.assertEqual(first["reason"], "experiment_assigned")
        self.assertEqual(second["reason"], "experiment_assigned")
        self.assertEqual(first["value"], second["value"])
        self.assertNotEqual(first["decision_id"], second["decision_id"])
 class DecisionPersistenceModeTest(TestCase):
    @override
    def setUp(self) -> None:
        cache.clear()
        self.owner = make_user(
            username="persist_owner",
            email="persist_owner@lotty.local",
        )
        self.flag = make_flag(suffix="_persist", default="default_val")
    def _make_running_experiment(self):
        experiment = make_experiment(
            flag=self.flag,
            owner=self.owner,
            suffix="_persist",
            traffic_allocation=Decimal("100.00"),
        )
        variant_create(
            experiment=experiment,
            user=self.owner,
            name="control",
            value="ctrl",
            weight=Decimal("50.00"),
            is_control=True,
        )
        variant_create(
            experiment=experiment,
            user=self.owner,
            name="treatment",
            value="treat",
            weight=Decimal("50.00"),
        )
        Experiment.objects.filter(pk=experiment.pk).update(
            status=ExperimentStatus.RUNNING,
        )
        return experiment
    @override_settings(DECISION_WRITE_MODE="async")
    def test_async_mode_enqueues_non_assigned_result(self) -> None:
        with patch(
            "apps.decision.services.persist_decision_task.delay"
        ) as delay_mock, patch(
            "apps.decision.services.decision_create"
        ) as create_mock:
            result = decide_for_flag(self.flag.key, "subj_async", {})
        self.assertEqual(result["reason"], "no_active_experiment")
        delay_mock.assert_called_once()
        create_mock.assert_not_called()
    @override_settings(DECISION_WRITE_MODE="async")
    def test_async_mode_keeps_assigned_result_sync(self) -> None:
        self._make_running_experiment()
        with patch(
            "apps.decision.services.persist_decision_task.delay"
        ) as delay_mock, patch(
            "apps.decision.services.decision_create"
        ) as create_mock:
            result = decide_for_flag(
                self.flag.key,
                "subj_async_assigned",
                {},
            )
        self.assertEqual(result["reason"], "experiment_assigned")
        delay_mock.assert_not_called()
        create_mock.assert_called_once()
    @override_settings(DECISION_WRITE_MODE="disabled")
    def test_disabled_mode_skips_non_assigned_persist(self) -> None:
        before = Decision.objects.count()
        result = decide_for_flag(self.flag.key, "subj_disabled", {})
        after = Decision.objects.count()
        self.assertEqual(result["reason"], "no_active_experiment")
        self.assertEqual(before, after)
    @override_settings(DECISION_WRITE_MODE="disabled")
    def test_disabled_mode_keeps_assigned_persist(self) -> None:
        self._make_running_experiment()
        before = Decision.objects.count()
        result = decide_for_flag(
            self.flag.key,
            "subj_disabled_assigned",
            {},
        )
        after = Decision.objects.count()
        self.assertEqual(result["reason"], "experiment_assigned")
        self.assertEqual(after, before + 1)
@@ -1,7 +1,11 @@
 import logging
-from apps.events.services import cleanup_expired_pending_events
+from apps.events.services import (
    cleanup_expired_pending_events,
    decision_create,
 )
 from config.celery import app
 from config.errors import ConflictError
 logger = logging.getLogger("lotty")
@@ -14,3 +18,35 @@ def cleanup_expired_pending_events_task(self):
        extra={"deleted": deleted},
    )
    return deleted
@app.task(bind=True, name="events.persist_decision")
 def persist_decision_task(
    self,
    *,
    decision_id: str,
    flag_key: str,
    subject_id: str,
    experiment_id: str | None = None,
    variant_id: str | None = None,
    value: str,
    reason: str,
 ):
    try:
        decision_create(
            decision_id=decision_id,
            flag_key=flag_key,
            subject_id=subject_id,
            experiment_id=experiment_id,
            variant_id=variant_id,
            value=value,
            reason=reason,
        )
    except ConflictError:
        logger.info(
            "decision_persist_skipped_conflict",
            extra={"decision_id": decision_id},
        )
        return {"status": "conflict", "decision_id": decision_id}
    return {"status": "created", "decision_id": decision_id}
@@ -589,9 +589,7 @@ class GuardrailHigherIsBetterTest(TestCase):
                self.v_treatment,
            )
        for i in range(3):
-            self._send_purchase(
+            self._send_purchase(f"pur_hib_ok_{i}", f"dec_hib_ok_{i}", f"u{i}")
                f"pur_hib_ok_{i}", f"dec_hib_ok_{i}", f"u{i}"
            )
        triggers = check_experiment_guardrails(self.experiment)
@@ -1,4 +1,3 @@
 from django.core.exceptions import ValidationError
 from django.test import TestCase
@@ -54,6 +54,16 @@ else:
        },
    }
 DECISION_RESULT_CACHE_TTL_SECONDS = env.int(
    "DECISION_RESULT_CACHE_TTL_SECONDS",
    default=60,
 )
 DECISION_WRITE_MODE = env(
    "DECISION_WRITE_MODE",
    default="sync",
 )
 # Celery
`@@ -1,4 +1,3 @@`

	`from django.core.exceptions import ValidationError`	`from django.core.exceptions import ValidationError`
	`from django.test import TestCase`	`from django.test import TestCase`