diff --git a/src/backend/apps/metrics/__init__.py b/src/backend/apps/metrics/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/backend/apps/metrics/apps.py b/src/backend/apps/metrics/apps.py new file mode 100644 index 0000000..8b002c4 --- /dev/null +++ b/src/backend/apps/metrics/apps.py @@ -0,0 +1,5 @@ +from django.apps import AppConfig + + +class MetricsConfig(AppConfig): + name = "apps.metrics" diff --git a/src/backend/apps/metrics/migrations/0001_initial.py b/src/backend/apps/metrics/migrations/0001_initial.py new file mode 100644 index 0000000..b2fe876 --- /dev/null +++ b/src/backend/apps/metrics/migrations/0001_initial.py @@ -0,0 +1,54 @@ +# Generated by Django 5.2.11 on 2026-02-14 09:55 + +import django.core.validators +import django.db.models.deletion +import uuid +from django.db import migrations, models + + +class Migration(migrations.Migration): + + initial = True + + dependencies = [ + ('experiments', '0001_initial'), + ] + + operations = [ + migrations.CreateModel( + name='MetricDefinition', + fields=[ + ('id', models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True, serialize=False)), + ('key', models.CharField(max_length=100, unique=True, validators=[django.core.validators.RegexValidator(message='Metric key must start with a lowercase letter and contain only lowercase letters, digits, and underscores.', regex='^[a-z][a-z0-9_]*$')], verbose_name='key')), + ('name', models.CharField(max_length=200, verbose_name='name')), + ('description', models.TextField(blank=True, verbose_name='description')), + ('metric_type', models.CharField(choices=[('ratio', 'Ratio'), ('count', 'Count'), ('average', 'Average'), ('percentile', 'Percentile')], max_length=20, verbose_name='metric type')), + ('direction', models.CharField(choices=[('higher_is_better', 'Higher is better'), ('lower_is_better', 'Lower is better'), ('neutral', 'Neutral')], default='neutral', max_length=20, verbose_name='direction')), + ('calculation_rule', models.JSONField(verbose_name='calculation rule')), + ('is_active', models.BooleanField(db_index=True, default=True, verbose_name='is active')), + ('created_at', models.DateTimeField(auto_now_add=True, verbose_name='created at')), + ('updated_at', models.DateTimeField(auto_now=True, verbose_name='updated at')), + ], + options={ + 'verbose_name': 'metric definition', + 'verbose_name_plural': 'metric definitions', + 'ordering': ['key'], + }, + ), + migrations.CreateModel( + name='ExperimentMetric', + fields=[ + ('id', models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True, serialize=False)), + ('is_primary', models.BooleanField(default=False, verbose_name='is primary metric')), + ('created_at', models.DateTimeField(auto_now_add=True, verbose_name='created at')), + ('experiment', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='experiment_metrics', to='experiments.experiment', verbose_name='experiment')), + ('metric', models.ForeignKey(on_delete=django.db.models.deletion.PROTECT, related_name='experiment_usages', to='metrics.metricdefinition', verbose_name='metric')), + ], + options={ + 'verbose_name': 'experiment metric', + 'verbose_name_plural': 'experiment metrics', + 'indexes': [models.Index(fields=['experiment', 'is_primary'], name='idx_exp_metric_primary')], + 'constraints': [models.UniqueConstraint(fields=('experiment', 'metric'), name='unique_experiment_metric')], + }, + ), + ] diff --git a/src/backend/apps/metrics/migrations/__init__.py b/src/backend/apps/metrics/migrations/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/backend/apps/metrics/models.py b/src/backend/apps/metrics/models.py new file mode 100644 index 0000000..639a789 --- /dev/null +++ b/src/backend/apps/metrics/models.py @@ -0,0 +1,128 @@ +from typing import override + +import django.core.validators +from django.db import models +from django.utils.translation import gettext_lazy as _ + +from apps.core.models import BaseModel + +METRIC_KEY_PATTERN = r"^[a-z][a-z0-9_]*$" + + +class MetricType(models.TextChoices): + RATIO = "ratio", _("Ratio") + COUNT = "count", _("Count") + AVERAGE = "average", _("Average") + PERCENTILE = "percentile", _("Percentile") + + +class MetricDirection(models.TextChoices): + HIGHER_IS_BETTER = "higher_is_better", _("Higher is better") + LOWER_IS_BETTER = "lower_is_better", _("Lower is better") + NEUTRAL = "neutral", _("Neutral") + + +class MetricDefinition(BaseModel): + key = models.CharField( + max_length=100, + unique=True, + verbose_name=_("key"), + validators=[ + django.core.validators.RegexValidator( + regex=METRIC_KEY_PATTERN, + message=( + "Metric key must start with a lowercase letter " + "and contain only lowercase letters, digits, " + "and underscores." + ), + ) + ], + ) + name = models.CharField( + max_length=200, + verbose_name=_("name"), + ) + description = models.TextField( + blank=True, + verbose_name=_("description"), + ) + metric_type = models.CharField( + max_length=20, + choices=MetricType.choices, + verbose_name=_("metric type"), + ) + direction = models.CharField( + max_length=20, + choices=MetricDirection.choices, + default=MetricDirection.NEUTRAL, + verbose_name=_("direction"), + ) + calculation_rule = models.JSONField( + verbose_name=_("calculation rule"), + ) + is_active = models.BooleanField( + default=True, + db_index=True, + verbose_name=_("is active"), + ) + created_at = models.DateTimeField( + auto_now_add=True, + verbose_name=_("created at"), + ) + updated_at = models.DateTimeField( + auto_now=True, + verbose_name=_("updated at"), + ) + + class Meta: + verbose_name = _("metric definition") + verbose_name_plural = _("metric definitions") + ordering = ["key"] + + @override + def __str__(self) -> str: + return f"{self.key} ({self.metric_type})" + + +class ExperimentMetric(BaseModel): + experiment = models.ForeignKey( + "experiments.Experiment", + on_delete=models.CASCADE, + related_name="experiment_metrics", + verbose_name=_("experiment"), + ) + metric = models.ForeignKey( + MetricDefinition, + on_delete=models.PROTECT, + related_name="experiment_usages", + verbose_name=_("metric"), + ) + is_primary = models.BooleanField( + default=False, + verbose_name=_("is primary metric"), + ) + created_at = models.DateTimeField( + auto_now_add=True, + verbose_name=_("created at"), + ) + + class Meta: + verbose_name = _("experiment metric") + verbose_name_plural = _("experiment metrics") + constraints = [ + models.UniqueConstraint( + fields=["experiment", "metric"], + name="unique_experiment_metric", + ), + ] + indexes = [ + models.Index( + fields=["experiment", "is_primary"], + name="idx_exp_metric_primary", + ), + ] + + @override + def __str__(self) -> str: + primary = " [primary]" if self.is_primary else "" + return f"{self.experiment.name} → {self.metric.key}{primary}" diff --git a/src/backend/apps/metrics/services.py b/src/backend/apps/metrics/services.py new file mode 100644 index 0000000..c66a6ad --- /dev/null +++ b/src/backend/apps/metrics/services.py @@ -0,0 +1,142 @@ +from typing import Any +from uuid import UUID + +from django.core.exceptions import ValidationError +from django.db import transaction +from django.db.models import QuerySet + +from apps.metrics.models import ( + ExperimentMetric, + MetricDefinition, + MetricType, +) + +VALID_RULE_FIELDS: dict[str, set[str]] = { + MetricType.RATIO: {"type", "numerator_event", "denominator_event"}, + MetricType.COUNT: {"type", "event"}, + MetricType.AVERAGE: {"type", "event", "property"}, + MetricType.PERCENTILE: {"type", "event", "property", "percentile"}, +} + +REQUIRED_RULE_FIELDS: dict[str, set[str]] = { + MetricType.RATIO: {"numerator_event", "denominator_event"}, + MetricType.COUNT: {"event"}, + MetricType.AVERAGE: {"event", "property"}, + MetricType.PERCENTILE: {"event", "property"}, +} + + +def _validate_calculation_rule( + metric_type: str, + rule: dict[str, Any], +) -> None: + required = REQUIRED_RULE_FIELDS.get(metric_type, set()) + missing = required - set(rule.keys()) + if missing: + raise ValidationError( + { + "calculation_rule": ( + f"Missing required fields for '{metric_type}': " + f"{', '.join(sorted(missing))}." + ) + } + ) + + +@transaction.atomic +def metric_definition_create( + *, + key: str, + name: str, + metric_type: str, + calculation_rule: dict[str, Any], + description: str = "", + direction: str = "neutral", +) -> MetricDefinition: + _validate_calculation_rule(metric_type, calculation_rule) + metric = MetricDefinition( + key=key, + name=name, + description=description, + metric_type=metric_type, + direction=direction, + calculation_rule=calculation_rule, + ) + metric.save() + return metric + + +def metric_definition_update( + *, + metric: MetricDefinition, + **fields: Any, +) -> MetricDefinition: + allowed = {"name", "description", "direction", "is_active"} + for key in fields: + if key not in allowed: + raise ValidationError({key: f"Field '{key}' cannot be updated."}) + for key, value in fields.items(): + if value is not None: + setattr(metric, key, value) + metric.save() + return metric + + +def metric_definition_list( + *, + is_active: bool | None = None, +) -> QuerySet[MetricDefinition]: + qs = MetricDefinition.objects.all() + if is_active is not None: + qs = qs.filter(is_active=is_active) + return qs + + +def metric_definition_get(metric_id: UUID) -> MetricDefinition | None: + return MetricDefinition.objects.filter(pk=metric_id).first() + + +@transaction.atomic +def experiment_metric_add( + *, + experiment: Any, + metric: MetricDefinition, + is_primary: bool = False, +) -> ExperimentMetric: + if is_primary: + experiment.experiment_metrics.filter(is_primary=True).update( + is_primary=False, + ) + em = ExperimentMetric( + experiment=experiment, + metric=metric, + is_primary=is_primary, + ) + em.save() + return em + + +@transaction.atomic +def experiment_metric_remove( + *, + experiment: Any, + metric: MetricDefinition, +) -> None: + deleted, _ = ExperimentMetric.objects.filter( + experiment=experiment, + metric=metric, + ).delete() + if deleted == 0: + raise ValidationError( + {"metric": "This metric is not attached to the experiment."} + ) + + +def experiment_metric_list( + experiment: Any, +) -> QuerySet[ExperimentMetric]: + return ( + ExperimentMetric.objects.filter(experiment=experiment) + .select_related("metric") + .order_by("-is_primary", "metric__key") + ) diff --git a/src/backend/apps/metrics/tests/__init__.py b/src/backend/apps/metrics/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/backend/apps/metrics/tests/test_metrics.py b/src/backend/apps/metrics/tests/test_metrics.py new file mode 100644 index 0000000..df92271 --- /dev/null +++ b/src/backend/apps/metrics/tests/test_metrics.py @@ -0,0 +1,345 @@ +from decimal import Decimal + +from django.core.exceptions import ValidationError +from django.test import TestCase + +from apps.experiments.services import ( + experiment_approve, + experiment_start, + experiment_submit_for_review, +) +from apps.experiments.tests.helpers import add_two_variants, make_experiment +from apps.guardrails.models import Guardrail, GuardrailAction +from apps.guardrails.services import ( + guardrail_create, + guardrail_delete, + guardrail_list, + guardrail_update, +) +from apps.metrics.models import ExperimentMetric, MetricDirection, MetricType +from apps.metrics.services import ( + experiment_metric_add, + experiment_metric_list, + experiment_metric_remove, + metric_definition_create, + metric_definition_update, +) +from apps.reviews.services import review_settings_update +from apps.reviews.tests.helpers import make_approver +from config.errors import ConflictError + + +class MetricDefinitionCreateTest(TestCase): + def test_create_ratio_metric(self) -> None: + metric = metric_definition_create( + key="click_rate", + name="Click Rate", + metric_type=MetricType.RATIO, + direction=MetricDirection.HIGHER_IS_BETTER, + calculation_rule={ + "type": "ratio", + "numerator_event": "button_clicked", + "denominator_event": "exposure", + }, + ) + self.assertEqual(metric.key, "click_rate") + self.assertEqual(metric.metric_type, MetricType.RATIO) + self.assertEqual(metric.direction, MetricDirection.HIGHER_IS_BETTER) + self.assertTrue(metric.is_active) + + def test_create_count_metric(self) -> None: + metric = metric_definition_create( + key="purchase_count", + name="Purchase Count", + metric_type=MetricType.COUNT, + calculation_rule={"type": "count", "event": "purchase"}, + ) + self.assertEqual(metric.metric_type, MetricType.COUNT) + + def test_create_average_metric(self) -> None: + metric = metric_definition_create( + key="avg_latency", + name="Average Latency", + metric_type=MetricType.AVERAGE, + direction=MetricDirection.LOWER_IS_BETTER, + calculation_rule={ + "type": "average", + "event": "page_loaded", + "property": "latency_ms", + }, + ) + self.assertEqual(metric.metric_type, MetricType.AVERAGE) + + def test_create_percentile_metric(self) -> None: + metric = metric_definition_create( + key="p95_latency", + name="P95 Latency", + metric_type=MetricType.PERCENTILE, + direction=MetricDirection.LOWER_IS_BETTER, + calculation_rule={ + "type": "percentile", + "event": "page_loaded", + "property": "latency_ms", + "percentile": 95, + }, + ) + self.assertEqual(metric.metric_type, MetricType.PERCENTILE) + + def test_reject_duplicate_key(self) -> None: + metric_definition_create( + key="dup_metric", + name="First", + metric_type=MetricType.COUNT, + calculation_rule={"type": "count", "event": "click"}, + ) + with self.assertRaises(ConflictError): + metric_definition_create( + key="dup_metric", + name="Second", + metric_type=MetricType.COUNT, + calculation_rule={"type": "count", "event": "click"}, + ) + + def test_reject_missing_required_rule_fields(self) -> None: + with self.assertRaises(ValidationError): + metric_definition_create( + key="bad_metric", + name="Bad", + metric_type=MetricType.RATIO, + calculation_rule={"type": "ratio", "numerator_event": "click"}, + ) + + def test_reject_invalid_key_pattern(self) -> None: + with self.assertRaises(ValidationError): + metric_definition_create( + key="Invalid-KEY", + name="Bad Key", + metric_type=MetricType.COUNT, + calculation_rule={"type": "count", "event": "x"}, + ) + + +class MetricDefinitionUpdateTest(TestCase): + def test_update_name_and_description(self) -> None: + metric = metric_definition_create( + key="upd_metric", + name="Old Name", + metric_type=MetricType.COUNT, + calculation_rule={"type": "count", "event": "click"}, + ) + updated = metric_definition_update( + metric=metric, + name="New Name", + description="Desc", + ) + self.assertEqual(updated.name, "New Name") + self.assertEqual(updated.description, "Desc") + + def test_deactivate_metric(self) -> None: + metric = metric_definition_create( + key="deact_metric", + name="Metric", + metric_type=MetricType.COUNT, + calculation_rule={"type": "count", "event": "click"}, + ) + updated = metric_definition_update(metric=metric, is_active=False) + self.assertFalse(updated.is_active) + + def test_reject_update_key(self) -> None: + metric = metric_definition_create( + key="immut_metric", + name="Metric", + metric_type=MetricType.COUNT, + calculation_rule={"type": "count", "event": "click"}, + ) + with self.assertRaises(ValidationError): + metric_definition_update(metric=metric, key="new_key") + + +class ExperimentMetricTest(TestCase): + def setUp(self) -> None: + self.experiment = make_experiment(suffix="_em") + self.metric = metric_definition_create( + key="em_click_rate", + name="Click Rate", + metric_type=MetricType.RATIO, + calculation_rule={ + "type": "ratio", + "numerator_event": "click", + "denominator_event": "exposure", + }, + ) + + def test_add_metric_to_experiment(self) -> None: + em = experiment_metric_add( + experiment=self.experiment, + metric=self.metric, + is_primary=True, + ) + self.assertTrue(em.is_primary) + self.assertEqual(em.experiment, self.experiment) + self.assertEqual(em.metric, self.metric) + + def test_list_experiment_metrics(self) -> None: + experiment_metric_add( + experiment=self.experiment, + metric=self.metric, + ) + ems = experiment_metric_list(self.experiment) + self.assertEqual(ems.count(), 1) + + def test_remove_metric_from_experiment(self) -> None: + experiment_metric_add( + experiment=self.experiment, + metric=self.metric, + ) + experiment_metric_remove( + experiment=self.experiment, + metric=self.metric, + ) + self.assertEqual( + ExperimentMetric.objects.filter( + experiment=self.experiment, + ).count(), + 0, + ) + + def test_reject_remove_nonexistent(self) -> None: + with self.assertRaises(ValidationError): + experiment_metric_remove( + experiment=self.experiment, + metric=self.metric, + ) + + def test_reject_duplicate_metric(self) -> None: + experiment_metric_add( + experiment=self.experiment, + metric=self.metric, + ) + with self.assertRaises(ConflictError): + experiment_metric_add( + experiment=self.experiment, + metric=self.metric, + ) + + def test_primary_metric_switch(self) -> None: + m2 = metric_definition_create( + key="em_error_rate", + name="Error Rate", + metric_type=MetricType.RATIO, + calculation_rule={ + "type": "ratio", + "numerator_event": "error", + "denominator_event": "exposure", + }, + ) + em1 = experiment_metric_add( + experiment=self.experiment, + metric=self.metric, + is_primary=True, + ) + em2 = experiment_metric_add( + experiment=self.experiment, + metric=m2, + is_primary=True, + ) + em1.refresh_from_db() + self.assertFalse(em1.is_primary) + self.assertTrue(em2.is_primary) + + +class GuardrailServiceTest(TestCase): + def setUp(self) -> None: + self.experiment = make_experiment(suffix="_gr") + self.metric = metric_definition_create( + key="gr_error_rate", + name="Error Rate", + metric_type=MetricType.RATIO, + direction=MetricDirection.LOWER_IS_BETTER, + calculation_rule={ + "type": "ratio", + "numerator_event": "error", + "denominator_event": "exposure", + }, + ) + + def test_create_guardrail(self) -> None: + g = guardrail_create( + experiment=self.experiment, + metric=self.metric, + threshold=Decimal("0.05"), + observation_window_minutes=30, + action=GuardrailAction.PAUSE, + ) + self.assertEqual(g.threshold, Decimal("0.05")) + self.assertEqual(g.action, GuardrailAction.PAUSE) + + def test_list_guardrails(self) -> None: + guardrail_create( + experiment=self.experiment, + metric=self.metric, + threshold=Decimal("0.05"), + ) + grs = guardrail_list(self.experiment) + self.assertEqual(grs.count(), 1) + + def test_update_guardrail_in_draft(self) -> None: + g = guardrail_create( + experiment=self.experiment, + metric=self.metric, + threshold=Decimal("0.05"), + ) + updated = guardrail_update( + guardrail=g, + threshold=Decimal("0.10"), + ) + self.assertEqual(updated.threshold, Decimal("0.10")) + + def test_reject_update_after_start(self) -> None: + review_settings_update( + default_min_approvals=1, allow_any_approver=True + ) + approver = make_approver("_gu") + add_two_variants(self.experiment) + exp = experiment_submit_for_review( + experiment=self.experiment, + user=self.experiment.owner, + ) + exp = experiment_approve(experiment=exp, approver=approver) + experiment_start(experiment=exp, user=self.experiment.owner) + g = guardrail_create( + experiment=self.experiment, + metric=self.metric, + threshold=Decimal("0.05"), + ) + with self.assertRaises(ValidationError): + guardrail_update(guardrail=g, threshold=Decimal("0.10")) + + def test_delete_guardrail_in_draft(self) -> None: + g = guardrail_create( + experiment=self.experiment, + metric=self.metric, + threshold=Decimal("0.05"), + ) + guardrail_delete(guardrail=g) + self.assertEqual(Guardrail.objects.count(), 0) + + def test_reject_delete_after_start(self) -> None: + review_settings_update( + default_min_approvals=1, allow_any_approver=True + ) + approver = make_approver("_gd") + add_two_variants(self.experiment) + exp = experiment_submit_for_review( + experiment=self.experiment, + user=self.experiment.owner, + ) + exp = experiment_approve(experiment=exp, approver=approver) + experiment_start(experiment=exp, user=self.experiment.owner) + g = guardrail_create( + experiment=self.experiment, + metric=self.metric, + threshold=Decimal("0.05"), + ) + with self.assertRaises(ValidationError): + guardrail_delete(guardrail=g)