chore(): added async decision persistence

This commit is contained in:
ITQ
2026-02-24 17:55:14 +03:00
parent 740fd2d7bd
commit cda60bb057
8 changed files with 515 additions and 108 deletions
+2
View File
@@ -12,6 +12,8 @@ DJANGO_STATIC_URL=static/
REDIS_URI=
DJANGO_DB_URI=sqlite:///db.sqlite3
DJANGO_CONN_MAX_AGE=300
DECISION_RESULT_CACHE_TTL_SECONDS=60
DECISION_WRITE_MODE=sync
DJANGO_SILKY_ENABLED=False
DJANGO_SILKY_PYTHON_PROFILER=False
@@ -54,6 +54,18 @@ class ChannelAPITest(TestCase):
self.assertEqual(resp.status_code, 201)
self.assertEqual(resp.json()["channel_type"], ChannelType.SMTP)
def test_create_telegram_channel_invalid_config(self) -> None:
resp = self._create_channel(config={"bot_token": "tok"})
self.assertEqual(resp.status_code, 422)
def test_create_smtp_channel_invalid_config(self) -> None:
resp = self._create_channel(
channel_type=ChannelType.SMTP,
name="Email",
config={},
)
self.assertEqual(resp.status_code, 422)
def test_list_channels(self) -> None:
self._create_channel()
resp = self.client.get(
@@ -92,6 +104,17 @@ class ChannelAPITest(TestCase):
self.assertEqual(resp.status_code, 200)
self.assertEqual(resp.json()["name"], "Updated")
def test_update_channel_invalid_config(self) -> None:
create_resp = self._create_channel()
ch_id = create_resp.json()["id"]
resp = self.client.patch(
reverse("api-1:update_channel", args=[ch_id]),
data=json.dumps({"config": {"bot_token": "tok"}}),
content_type="application/json",
HTTP_AUTHORIZATION=self.auth,
)
self.assertEqual(resp.status_code, 422)
def test_delete_channel(self) -> None:
create_resp = self._create_channel()
ch_id = create_resp.json()["id"]
+293 -100
View File
@@ -1,9 +1,11 @@
import hashlib
import json
import logging
import uuid
from datetime import timedelta
from decimal import Decimal
from django.conf import settings
from django.core.cache import cache
from django.utils import timezone
from prometheus_client import Counter
@@ -12,6 +14,7 @@ from apps.conflicts.models import ExperimentConflictDomain
from apps.conflicts.services import resolve_domain_conflict
from apps.events.models import Decision
from apps.events.services import decision_create
from apps.events.tasks import persist_decision_task
from apps.experiments.models import (
ACTIVE_STATUSES,
Experiment,
@@ -36,6 +39,25 @@ FLAG_CACHE_TTL = 300
EXPERIMENT_CACHE_TTL = 60
MAX_CONCURRENT_EXPERIMENTS = 3
COOLDOWN_DAYS = 7
DECISION_WRITE_MODE_SYNC = "sync"
DECISION_WRITE_MODE_ASYNC = "async"
DECISION_WRITE_MODE_DISABLED = "disabled"
DECISION_WRITE_MODE_VALUES = {
DECISION_WRITE_MODE_SYNC,
DECISION_WRITE_MODE_ASYNC,
DECISION_WRITE_MODE_DISABLED,
}
DECISION_FORCE_SYNC_REASON = "experiment_assigned"
DECISION_CACHEABLE_REASONS = frozenset(
{
"flag_not_found",
"no_active_experiment",
"targeting_mismatch",
"outside_traffic_allocation",
"no_variants",
"experiment_assigned",
}
)
def _hash_subject(subject_id: str, experiment_id: str, salt: str) -> Decimal:
@@ -56,16 +78,156 @@ def _select_variant(
return variants[-1] if variants else None
def _persist_decision(result: dict, subject_id: str) -> None:
decision_create(
decision_id=result["decision_id"],
flag_key=result["flag"],
subject_id=subject_id,
experiment_id=result.get("experiment_id"),
variant_id=result.get("variant_id"),
value=str(result["value"]) if result["value"] is not None else "",
reason=result["reason"],
def _decision_result_cache_ttl() -> int:
ttl = int(
getattr(settings, "DECISION_RESULT_CACHE_TTL_SECONDS", 60),
)
return max(ttl, 0)
def _decision_write_mode() -> str:
mode = str(
getattr(
settings,
"DECISION_WRITE_MODE",
DECISION_WRITE_MODE_SYNC,
)
).lower()
if mode in DECISION_WRITE_MODE_VALUES:
return mode
return DECISION_WRITE_MODE_SYNC
def _subject_attributes_digest(subject_attributes: dict) -> str:
encoded = json.dumps(
subject_attributes,
sort_keys=True,
separators=(",", ":"),
default=str,
).encode()
return hashlib.sha256(encoded).hexdigest()
def _decision_cache_key(
*,
flag_key: str,
subject_id: str,
subject_attributes_digest: str,
flag: FeatureFlag | None,
experiment: Experiment | None,
) -> str:
flag_revision = flag.updated_at.isoformat() if flag else "missing"
experiment_id = str(experiment.pk) if experiment else "none"
experiment_status = experiment.status if experiment else "none"
experiment_version = str(experiment.version) if experiment else "none"
experiment_revision = (
experiment.updated_at.isoformat() if experiment else "none"
)
return (
"decide_result:"
f"{flag_key}:"
f"{subject_id}:"
f"{subject_attributes_digest}:"
f"{flag_revision}:"
f"{experiment_id}:"
f"{experiment_status}:"
f"{experiment_version}:"
f"{experiment_revision}"
)
def _decision_template(result: dict) -> dict:
return {
"flag": result["flag"],
"value": result["value"],
"experiment_id": result.get("experiment_id"),
"variant_id": result.get("variant_id"),
"reason": result["reason"],
}
def _decision_from_template(template: dict) -> dict:
return {
"flag": template["flag"],
"value": template["value"],
"decision_id": str(uuid.uuid4()),
"experiment_id": template.get("experiment_id"),
"variant_id": template.get("variant_id"),
"reason": template["reason"],
}
def _cache_get_decision(cache_key: str) -> dict | None:
cached = cache.get(cache_key)
if not isinstance(cached, dict):
return None
required = {"flag", "value", "reason"}
if not required.issubset(cached.keys()):
return None
return _decision_from_template(cached)
def _cache_set_decision(cache_key: str, result: dict) -> None:
if result["reason"] not in DECISION_CACHEABLE_REASONS:
return
ttl = _decision_result_cache_ttl()
if ttl == 0:
return
cache.set(cache_key, _decision_template(result), ttl)
def _build_result(
*,
flag_key: str,
value: str | int | float | bool | None,
reason: str,
experiment_id: str | None = None,
variant_id: str | None = None,
) -> dict:
return {
"flag": flag_key,
"value": value,
"decision_id": str(uuid.uuid4()),
"experiment_id": experiment_id,
"variant_id": variant_id,
"reason": reason,
}
def _decision_payload(result: dict, subject_id: str) -> dict:
return {
"decision_id": result["decision_id"],
"flag_key": result["flag"],
"subject_id": subject_id,
"experiment_id": result.get("experiment_id"),
"variant_id": result.get("variant_id"),
"value": str(result["value"])
if result["value"] is not None
else "",
"reason": result["reason"],
}
def _persist_decision(result: dict, subject_id: str) -> None:
payload = _decision_payload(result, subject_id)
mode = _decision_write_mode()
is_force_sync = result["reason"] == DECISION_FORCE_SYNC_REASON
if mode == DECISION_WRITE_MODE_DISABLED and not is_force_sync:
return
if mode == DECISION_WRITE_MODE_ASYNC and not is_force_sync:
try:
persist_decision_task.delay(**payload)
except Exception:
logger.exception(
"decision_async_persist_failed",
extra={"reason": result["reason"]},
)
else:
return
decision_create(**payload)
def _cached_flag_get(flag_key: str) -> FeatureFlag | None:
@@ -171,77 +333,107 @@ def _check_domain_conflicts(
return True
def _finalize_result(
*,
result: dict,
subject_id: str,
cache_key: str,
) -> dict:
DECIDE_REQUESTS.labels(reason=result["reason"]).inc()
_cache_set_decision(cache_key, result)
_persist_decision(result, subject_id)
return result
def decide_for_flag(
flag_key: str,
subject_id: str,
subject_attributes: dict,
) -> dict:
flag = _cached_flag_get(flag_key)
if not flag:
DECIDE_REQUESTS.labels(reason="flag_not_found").inc()
result = {
"flag": flag_key,
"value": None,
"decision_id": str(uuid.uuid4()),
"experiment_id": None,
"variant_id": None,
"reason": "flag_not_found",
}
_persist_decision(result, subject_id)
return result
subject_attributes_digest = _subject_attributes_digest(
subject_attributes,
)
flag = _cached_flag_get(flag_key)
experiment = _cached_active_experiment(flag.pk) if flag else None
cache_key = _decision_cache_key(
flag_key=flag_key,
subject_id=subject_id,
subject_attributes_digest=subject_attributes_digest,
flag=flag,
experiment=experiment,
)
cached_result = _cache_get_decision(cache_key)
if cached_result is not None:
return _finalize_result(
result=cached_result,
subject_id=subject_id,
cache_key=cache_key,
)
if not flag:
result = _build_result(
flag_key=flag_key,
value=None,
reason="flag_not_found",
)
return _finalize_result(
result=result,
subject_id=subject_id,
cache_key=cache_key,
)
experiment = _cached_active_experiment(flag.pk)
if not experiment or experiment.status != ExperimentStatus.RUNNING:
DECIDE_REQUESTS.labels(reason="no_active_experiment").inc()
result = {
"flag": flag_key,
"value": flag.default_value,
"decision_id": str(uuid.uuid4()),
"experiment_id": None,
"variant_id": None,
"reason": "no_active_experiment",
}
_persist_decision(result, subject_id)
return result
result = _build_result(
flag_key=flag_key,
value=flag.default_value,
reason="no_active_experiment",
)
return _finalize_result(
result=result,
subject_id=subject_id,
cache_key=cache_key,
)
if not _check_targeting(experiment.targeting_rules, subject_attributes):
DECIDE_REQUESTS.labels(reason="targeting_mismatch").inc()
result = {
"flag": flag_key,
"value": flag.default_value,
"decision_id": str(uuid.uuid4()),
"experiment_id": str(experiment.pk),
"variant_id": None,
"reason": "targeting_mismatch",
}
_persist_decision(result, subject_id)
return result
result = _build_result(
flag_key=flag_key,
value=flag.default_value,
reason="targeting_mismatch",
experiment_id=str(experiment.pk),
)
return _finalize_result(
result=result,
subject_id=subject_id,
cache_key=cache_key,
)
if not _check_participation_limits(subject_id, experiment.pk):
DECIDE_REQUESTS.labels(reason="participation_limit").inc()
result = {
"flag": flag_key,
"value": flag.default_value,
"decision_id": str(uuid.uuid4()),
"experiment_id": str(experiment.pk),
"variant_id": None,
"reason": "participation_limit",
}
_persist_decision(result, subject_id)
return result
result = _build_result(
flag_key=flag_key,
value=flag.default_value,
reason="participation_limit",
experiment_id=str(experiment.pk),
)
return _finalize_result(
result=result,
subject_id=subject_id,
cache_key=cache_key,
)
if not _check_domain_conflicts(experiment, subject_id):
DECIDE_REQUESTS.labels(reason="domain_conflict").inc()
result = {
"flag": flag_key,
"value": flag.default_value,
"decision_id": str(uuid.uuid4()),
"experiment_id": str(experiment.pk),
"variant_id": None,
"reason": "domain_conflict",
}
_persist_decision(result, subject_id)
return result
result = _build_result(
flag_key=flag_key,
value=flag.default_value,
reason="domain_conflict",
experiment_id=str(experiment.pk),
)
return _finalize_result(
result=result,
subject_id=subject_id,
cache_key=cache_key,
)
allocation_hash = _hash_subject(
subject_id,
@@ -249,31 +441,31 @@ def decide_for_flag(
"allocation",
)
if allocation_hash >= experiment.traffic_allocation:
DECIDE_REQUESTS.labels(reason="outside_traffic_allocation").inc()
result = {
"flag": flag_key,
"value": flag.default_value,
"decision_id": str(uuid.uuid4()),
"experiment_id": str(experiment.pk),
"variant_id": None,
"reason": "outside_traffic_allocation",
}
_persist_decision(result, subject_id)
return result
result = _build_result(
flag_key=flag_key,
value=flag.default_value,
reason="outside_traffic_allocation",
experiment_id=str(experiment.pk),
)
return _finalize_result(
result=result,
subject_id=subject_id,
cache_key=cache_key,
)
variants = list(experiment.variants.all())
if not variants:
DECIDE_REQUESTS.labels(reason="no_variants").inc()
result = {
"flag": flag_key,
"value": flag.default_value,
"decision_id": str(uuid.uuid4()),
"experiment_id": str(experiment.pk),
"variant_id": None,
"reason": "no_variants",
}
_persist_decision(result, subject_id)
return result
result = _build_result(
flag_key=flag_key,
value=flag.default_value,
reason="no_variants",
experiment_id=str(experiment.pk),
)
return _finalize_result(
result=result,
subject_id=subject_id,
cache_key=cache_key,
)
variant_hash = _hash_subject(
subject_id,
@@ -284,14 +476,15 @@ def decide_for_flag(
normalized_hash = variant_hash * total_weight / Decimal(100)
selected = _select_variant(variants, normalized_hash)
DECIDE_REQUESTS.labels(reason="experiment_assigned").inc()
result = {
"flag": flag_key,
"value": selected.value if selected else flag.default_value,
"decision_id": str(uuid.uuid4()),
"experiment_id": str(experiment.pk),
"variant_id": str(selected.pk) if selected else None,
"reason": "experiment_assigned",
}
_persist_decision(result, subject_id)
return result
result = _build_result(
flag_key=flag_key,
value=selected.value if selected else flag.default_value,
reason="experiment_assigned",
experiment_id=str(experiment.pk),
variant_id=str(selected.pk) if selected else None,
)
return _finalize_result(
result=result,
subject_id=subject_id,
cache_key=cache_key,
)
+149 -3
View File
@@ -1,8 +1,9 @@
from decimal import Decimal
from typing import override
from unittest.mock import patch
from django.core.cache import cache
from django.test import TestCase
from django.test import TestCase, override_settings
from apps.decision.services import (
MAX_CONCURRENT_EXPERIMENTS,
@@ -10,6 +11,7 @@ from apps.decision.services import (
_select_variant,
decide_for_flag,
)
from apps.events.models import Decision
from apps.experiments.models import Experiment, ExperimentStatus
from apps.experiments.services import variant_create
from apps.experiments.tests.helpers import make_experiment, make_flag
@@ -37,11 +39,12 @@ class HashSubjectTest(TestCase):
class SelectVariantTest(TestCase):
def test_selects_by_weight(self) -> None:
class FV:
def __init__(self, n, w):
def __init__(self, n, w, i):
self.name = n
self.weight = Decimal(str(w))
self.id = i
variants = [FV("a", 50), FV("b", 50)]
variants = [FV("a", 50, "uuid-a"), FV("b", 50, "uuid-b")]
self.assertEqual(_select_variant(variants, Decimal(10)).name, "a")
self.assertEqual(_select_variant(variants, Decimal(60)).name, "b")
@@ -370,3 +373,146 @@ class PartialTrafficVariantDistributionTest(TestCase):
ratio = counts["ctrl"] / total
self.assertGreater(ratio, 0.3)
self.assertLess(ratio, 0.7)
class DecisionCachingTest(TestCase):
@override
def setUp(self) -> None:
cache.clear()
self.owner = make_user(
username="cache_owner",
email="cache_owner@lotty.local",
)
self.flag = make_flag(suffix="_cache", default="default_val")
self.experiment = make_experiment(
flag=self.flag,
owner=self.owner,
suffix="_cache",
traffic_allocation=Decimal("100.00"),
)
variant_create(
experiment=self.experiment,
user=self.owner,
name="control",
value="ctrl",
weight=Decimal("50.00"),
is_control=True,
)
variant_create(
experiment=self.experiment,
user=self.owner,
name="treatment",
value="treat",
weight=Decimal("50.00"),
)
Experiment.objects.filter(pk=self.experiment.pk).update(
status=ExperimentStatus.RUNNING,
)
def test_cached_result_skips_recalculation(self) -> None:
first = decide_for_flag(
self.flag.key,
"cached_subject",
{"country": "US"},
)
with patch(
"apps.decision.services._check_targeting",
side_effect=AssertionError("cache miss"),
):
second = decide_for_flag(
self.flag.key,
"cached_subject",
{"country": "US"},
)
self.assertEqual(first["reason"], "experiment_assigned")
self.assertEqual(second["reason"], "experiment_assigned")
self.assertEqual(first["value"], second["value"])
self.assertNotEqual(first["decision_id"], second["decision_id"])
class DecisionPersistenceModeTest(TestCase):
@override
def setUp(self) -> None:
cache.clear()
self.owner = make_user(
username="persist_owner",
email="persist_owner@lotty.local",
)
self.flag = make_flag(suffix="_persist", default="default_val")
def _make_running_experiment(self):
experiment = make_experiment(
flag=self.flag,
owner=self.owner,
suffix="_persist",
traffic_allocation=Decimal("100.00"),
)
variant_create(
experiment=experiment,
user=self.owner,
name="control",
value="ctrl",
weight=Decimal("50.00"),
is_control=True,
)
variant_create(
experiment=experiment,
user=self.owner,
name="treatment",
value="treat",
weight=Decimal("50.00"),
)
Experiment.objects.filter(pk=experiment.pk).update(
status=ExperimentStatus.RUNNING,
)
return experiment
@override_settings(DECISION_WRITE_MODE="async")
def test_async_mode_enqueues_non_assigned_result(self) -> None:
with patch(
"apps.decision.services.persist_decision_task.delay"
) as delay_mock, patch(
"apps.decision.services.decision_create"
) as create_mock:
result = decide_for_flag(self.flag.key, "subj_async", {})
self.assertEqual(result["reason"], "no_active_experiment")
delay_mock.assert_called_once()
create_mock.assert_not_called()
@override_settings(DECISION_WRITE_MODE="async")
def test_async_mode_keeps_assigned_result_sync(self) -> None:
self._make_running_experiment()
with patch(
"apps.decision.services.persist_decision_task.delay"
) as delay_mock, patch(
"apps.decision.services.decision_create"
) as create_mock:
result = decide_for_flag(
self.flag.key,
"subj_async_assigned",
{},
)
self.assertEqual(result["reason"], "experiment_assigned")
delay_mock.assert_not_called()
create_mock.assert_called_once()
@override_settings(DECISION_WRITE_MODE="disabled")
def test_disabled_mode_skips_non_assigned_persist(self) -> None:
before = Decision.objects.count()
result = decide_for_flag(self.flag.key, "subj_disabled", {})
after = Decision.objects.count()
self.assertEqual(result["reason"], "no_active_experiment")
self.assertEqual(before, after)
@override_settings(DECISION_WRITE_MODE="disabled")
def test_disabled_mode_keeps_assigned_persist(self) -> None:
self._make_running_experiment()
before = Decision.objects.count()
result = decide_for_flag(
self.flag.key,
"subj_disabled_assigned",
{},
)
after = Decision.objects.count()
self.assertEqual(result["reason"], "experiment_assigned")
self.assertEqual(after, before + 1)
+37 -1
View File
@@ -1,7 +1,11 @@
import logging
from apps.events.services import cleanup_expired_pending_events
from apps.events.services import (
cleanup_expired_pending_events,
decision_create,
)
from config.celery import app
from config.errors import ConflictError
logger = logging.getLogger("lotty")
@@ -14,3 +18,35 @@ def cleanup_expired_pending_events_task(self):
extra={"deleted": deleted},
)
return deleted
@app.task(bind=True, name="events.persist_decision")
def persist_decision_task(
self,
*,
decision_id: str,
flag_key: str,
subject_id: str,
experiment_id: str | None = None,
variant_id: str | None = None,
value: str,
reason: str,
):
try:
decision_create(
decision_id=decision_id,
flag_key=flag_key,
subject_id=subject_id,
experiment_id=experiment_id,
variant_id=variant_id,
value=value,
reason=reason,
)
except ConflictError:
logger.info(
"decision_persist_skipped_conflict",
extra={"decision_id": decision_id},
)
return {"status": "conflict", "decision_id": decision_id}
return {"status": "created", "decision_id": decision_id}
@@ -589,9 +589,7 @@ class GuardrailHigherIsBetterTest(TestCase):
self.v_treatment,
)
for i in range(3):
self._send_purchase(
f"pur_hib_ok_{i}", f"dec_hib_ok_{i}", f"u{i}"
)
self._send_purchase(f"pur_hib_ok_{i}", f"dec_hib_ok_{i}", f"u{i}")
triggers = check_experiment_guardrails(self.experiment)
@@ -1,4 +1,3 @@
from django.core.exceptions import ValidationError
from django.test import TestCase
+10
View File
@@ -54,6 +54,16 @@ else:
},
}
DECISION_RESULT_CACHE_TTL_SECONDS = env.int(
"DECISION_RESULT_CACHE_TTL_SECONDS",
default=60,
)
DECISION_WRITE_MODE = env(
"DECISION_WRITE_MODE",
default="sync",
)
# Celery