diff --git a/src/backend/apps/metrics/__init__.py b/src/backend/apps/metrics/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/backend/apps/metrics/apps.py b/src/backend/apps/metrics/apps.py
new file mode 100644
index 0000000..8b002c4
--- /dev/null
+++ b/src/backend/apps/metrics/apps.py
@@ -0,0 +1,5 @@
+from django.apps import AppConfig
+
+
+class MetricsConfig(AppConfig):
+    name = "apps.metrics"
diff --git a/src/backend/apps/metrics/migrations/0001_initial.py b/src/backend/apps/metrics/migrations/0001_initial.py
new file mode 100644
index 0000000..b2fe876
--- /dev/null
+++ b/src/backend/apps/metrics/migrations/0001_initial.py
@@ -0,0 +1,54 @@
+# Generated by Django 5.2.11 on 2026-02-14 09:55
+
+import django.core.validators
+import django.db.models.deletion
+import uuid
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    initial = True
+
+    dependencies = [
+        ('experiments', '0001_initial'),
+    ]
+
+    operations = [
+        migrations.CreateModel(
+            name='MetricDefinition',
+            fields=[
+                ('id', models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True, serialize=False)),
+                ('key', models.CharField(max_length=100, unique=True, validators=[django.core.validators.RegexValidator(message='Metric key must start with a lowercase letter and contain only lowercase letters, digits, and underscores.', regex='^[a-z][a-z0-9_]*$')], verbose_name='key')),
+                ('name', models.CharField(max_length=200, verbose_name='name')),
+                ('description', models.TextField(blank=True, verbose_name='description')),
+                ('metric_type', models.CharField(choices=[('ratio', 'Ratio'), ('count', 'Count'), ('average', 'Average'), ('percentile', 'Percentile')], max_length=20, verbose_name='metric type')),
+                ('direction', models.CharField(choices=[('higher_is_better', 'Higher is better'), ('lower_is_better', 'Lower is better'), ('neutral', 'Neutral')], default='neutral', max_length=20, verbose_name='direction')),
+                ('calculation_rule', models.JSONField(verbose_name='calculation rule')),
+                ('is_active', models.BooleanField(db_index=True, default=True, verbose_name='is active')),
+                ('created_at', models.DateTimeField(auto_now_add=True, verbose_name='created at')),
+                ('updated_at', models.DateTimeField(auto_now=True, verbose_name='updated at')),
+            ],
+            options={
+                'verbose_name': 'metric definition',
+                'verbose_name_plural': 'metric definitions',
+                'ordering': ['key'],
+            },
+        ),
+        migrations.CreateModel(
+            name='ExperimentMetric',
+            fields=[
+                ('id', models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True, serialize=False)),
+                ('is_primary', models.BooleanField(default=False, verbose_name='is primary metric')),
+                ('created_at', models.DateTimeField(auto_now_add=True, verbose_name='created at')),
+                ('experiment', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='experiment_metrics', to='experiments.experiment', verbose_name='experiment')),
+                ('metric', models.ForeignKey(on_delete=django.db.models.deletion.PROTECT, related_name='experiment_usages', to='metrics.metricdefinition', verbose_name='metric')),
+            ],
+            options={
+                'verbose_name': 'experiment metric',
+                'verbose_name_plural': 'experiment metrics',
+                'indexes': [models.Index(fields=['experiment', 'is_primary'], name='idx_exp_metric_primary')],
+                'constraints': [models.UniqueConstraint(fields=('experiment', 'metric'), name='unique_experiment_metric')],
+            },
+        ),
+    ]
diff --git a/src/backend/apps/metrics/migrations/__init__.py b/src/backend/apps/metrics/migrations/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/backend/apps/metrics/models.py b/src/backend/apps/metrics/models.py
new file mode 100644
index 0000000..639a789
--- /dev/null
+++ b/src/backend/apps/metrics/models.py
@@ -0,0 +1,128 @@
+from typing import override
+
+import django.core.validators
+from django.db import models
+from django.utils.translation import gettext_lazy as _
+
+from apps.core.models import BaseModel
+
+METRIC_KEY_PATTERN = r"^[a-z][a-z0-9_]*$"
+
+
+class MetricType(models.TextChoices):
+    RATIO = "ratio", _("Ratio")
+    COUNT = "count", _("Count")
+    AVERAGE = "average", _("Average")
+    PERCENTILE = "percentile", _("Percentile")
+
+
+class MetricDirection(models.TextChoices):
+    HIGHER_IS_BETTER = "higher_is_better", _("Higher is better")
+    LOWER_IS_BETTER = "lower_is_better", _("Lower is better")
+    NEUTRAL = "neutral", _("Neutral")
+
+
+class MetricDefinition(BaseModel):
+    key = models.CharField(
+        max_length=100,
+        unique=True,
+        verbose_name=_("key"),
+        validators=[
+            django.core.validators.RegexValidator(
+                regex=METRIC_KEY_PATTERN,
+                message=(
+                    "Metric key must start with a lowercase letter "
+                    "and contain only lowercase letters, digits, "
+                    "and underscores."
+                ),
+            )
+        ],
+    )
+    name = models.CharField(
+        max_length=200,
+        verbose_name=_("name"),
+    )
+    description = models.TextField(
+        blank=True,
+        verbose_name=_("description"),
+    )
+    metric_type = models.CharField(
+        max_length=20,
+        choices=MetricType.choices,
+        verbose_name=_("metric type"),
+    )
+    direction = models.CharField(
+        max_length=20,
+        choices=MetricDirection.choices,
+        default=MetricDirection.NEUTRAL,
+        verbose_name=_("direction"),
+    )
+    calculation_rule = models.JSONField(
+        verbose_name=_("calculation rule"),
+    )
+    is_active = models.BooleanField(
+        default=True,
+        db_index=True,
+        verbose_name=_("is active"),
+    )
+    created_at = models.DateTimeField(
+        auto_now_add=True,
+        verbose_name=_("created at"),
+    )
+    updated_at = models.DateTimeField(
+        auto_now=True,
+        verbose_name=_("updated at"),
+    )
+
+    class Meta:
+        verbose_name = _("metric definition")
+        verbose_name_plural = _("metric definitions")
+        ordering = ["key"]
+
+    @override
+    def __str__(self) -> str:
+        return f"{self.key} ({self.metric_type})"
+
+
+class ExperimentMetric(BaseModel):
+    experiment = models.ForeignKey(
+        "experiments.Experiment",
+        on_delete=models.CASCADE,
+        related_name="experiment_metrics",
+        verbose_name=_("experiment"),
+    )
+    metric = models.ForeignKey(
+        MetricDefinition,
+        on_delete=models.PROTECT,
+        related_name="experiment_usages",
+        verbose_name=_("metric"),
+    )
+    is_primary = models.BooleanField(
+        default=False,
+        verbose_name=_("is primary metric"),
+    )
+    created_at = models.DateTimeField(
+        auto_now_add=True,
+        verbose_name=_("created at"),
+    )
+
+    class Meta:
+        verbose_name = _("experiment metric")
+        verbose_name_plural = _("experiment metrics")
+        constraints = [
+            models.UniqueConstraint(
+                fields=["experiment", "metric"],
+                name="unique_experiment_metric",
+            ),
+        ]
+        indexes = [
+            models.Index(
+                fields=["experiment", "is_primary"],
+                name="idx_exp_metric_primary",
+            ),
+        ]
+
+    @override
+    def __str__(self) -> str:
+        primary = " [primary]" if self.is_primary else ""
+        return f"{self.experiment.name} → {self.metric.key}{primary}"
diff --git a/src/backend/apps/metrics/services.py b/src/backend/apps/metrics/services.py
new file mode 100644
index 0000000..c66a6ad
--- /dev/null
+++ b/src/backend/apps/metrics/services.py
@@ -0,0 +1,142 @@
+from typing import Any
+from uuid import UUID
+
+from django.core.exceptions import ValidationError
+from django.db import transaction
+from django.db.models import QuerySet
+
+from apps.metrics.models import (
+    ExperimentMetric,
+    MetricDefinition,
+    MetricType,
+)
+
+VALID_RULE_FIELDS: dict[str, set[str]] = {
+    MetricType.RATIO: {"type", "numerator_event", "denominator_event"},
+    MetricType.COUNT: {"type", "event"},
+    MetricType.AVERAGE: {"type", "event", "property"},
+    MetricType.PERCENTILE: {"type", "event", "property", "percentile"},
+}
+
+REQUIRED_RULE_FIELDS: dict[str, set[str]] = {
+    MetricType.RATIO: {"numerator_event", "denominator_event"},
+    MetricType.COUNT: {"event"},
+    MetricType.AVERAGE: {"event", "property"},
+    MetricType.PERCENTILE: {"event", "property"},
+}
+
+
+def _validate_calculation_rule(
+    metric_type: str,
+    rule: dict[str, Any],
+) -> None:
+    required = REQUIRED_RULE_FIELDS.get(metric_type, set())
+    missing = required - set(rule.keys())
+    if missing:
+        raise ValidationError(
+            {
+                "calculation_rule": (
+                    f"Missing required fields for '{metric_type}': "
+                    f"{', '.join(sorted(missing))}."
+                )
+            }
+        )
+
+
+@transaction.atomic
+def metric_definition_create(
+    *,
+    key: str,
+    name: str,
+    metric_type: str,
+    calculation_rule: dict[str, Any],
+    description: str = "",
+    direction: str = "neutral",
+) -> MetricDefinition:
+    _validate_calculation_rule(metric_type, calculation_rule)
+    metric = MetricDefinition(
+        key=key,
+        name=name,
+        description=description,
+        metric_type=metric_type,
+        direction=direction,
+        calculation_rule=calculation_rule,
+    )
+    metric.save()
+    return metric
+
+
+def metric_definition_update(
+    *,
+    metric: MetricDefinition,
+    **fields: Any,
+) -> MetricDefinition:
+    allowed = {"name", "description", "direction", "is_active"}
+    for key in fields:
+        if key not in allowed:
+            raise ValidationError({key: f"Field '{key}' cannot be updated."})
+    for key, value in fields.items():
+        if value is not None:
+            setattr(metric, key, value)
+    metric.save()
+    return metric
+
+
+def metric_definition_list(
+    *,
+    is_active: bool | None = None,
+) -> QuerySet[MetricDefinition]:
+    qs = MetricDefinition.objects.all()
+    if is_active is not None:
+        qs = qs.filter(is_active=is_active)
+    return qs
+
+
+def metric_definition_get(metric_id: UUID) -> MetricDefinition | None:
+    return MetricDefinition.objects.filter(pk=metric_id).first()
+
+
+@transaction.atomic
+def experiment_metric_add(
+    *,
+    experiment: Any,
+    metric: MetricDefinition,
+    is_primary: bool = False,
+) -> ExperimentMetric:
+    if is_primary:
+        experiment.experiment_metrics.filter(is_primary=True).update(
+            is_primary=False,
+        )
+    em = ExperimentMetric(
+        experiment=experiment,
+        metric=metric,
+        is_primary=is_primary,
+    )
+    em.save()
+    return em
+
+
+@transaction.atomic
+def experiment_metric_remove(
+    *,
+    experiment: Any,
+    metric: MetricDefinition,
+) -> None:
+    deleted, _ = ExperimentMetric.objects.filter(
+        experiment=experiment,
+        metric=metric,
+    ).delete()
+    if deleted == 0:
+        raise ValidationError(
+            {"metric": "This metric is not attached to the experiment."}
+        )
+
+
+def experiment_metric_list(
+    experiment: Any,
+) -> QuerySet[ExperimentMetric]:
+    return (
+        ExperimentMetric.objects.filter(experiment=experiment)
+        .select_related("metric")
+        .order_by("-is_primary", "metric__key")
+    )
diff --git a/src/backend/apps/metrics/tests/__init__.py b/src/backend/apps/metrics/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/backend/apps/metrics/tests/test_metrics.py b/src/backend/apps/metrics/tests/test_metrics.py
new file mode 100644
index 0000000..df92271
--- /dev/null
+++ b/src/backend/apps/metrics/tests/test_metrics.py
@@ -0,0 +1,345 @@
+from decimal import Decimal
+
+from django.core.exceptions import ValidationError
+from django.test import TestCase
+
+from apps.experiments.services import (
+    experiment_approve,
+    experiment_start,
+    experiment_submit_for_review,
+)
+from apps.experiments.tests.helpers import add_two_variants, make_experiment
+from apps.guardrails.models import Guardrail, GuardrailAction
+from apps.guardrails.services import (
+    guardrail_create,
+    guardrail_delete,
+    guardrail_list,
+    guardrail_update,
+)
+from apps.metrics.models import ExperimentMetric, MetricDirection, MetricType
+from apps.metrics.services import (
+    experiment_metric_add,
+    experiment_metric_list,
+    experiment_metric_remove,
+    metric_definition_create,
+    metric_definition_update,
+)
+from apps.reviews.services import review_settings_update
+from apps.reviews.tests.helpers import make_approver
+from config.errors import ConflictError
+
+
+class MetricDefinitionCreateTest(TestCase):
+    def test_create_ratio_metric(self) -> None:
+        metric = metric_definition_create(
+            key="click_rate",
+            name="Click Rate",
+            metric_type=MetricType.RATIO,
+            direction=MetricDirection.HIGHER_IS_BETTER,
+            calculation_rule={
+                "type": "ratio",
+                "numerator_event": "button_clicked",
+                "denominator_event": "exposure",
+            },
+        )
+        self.assertEqual(metric.key, "click_rate")
+        self.assertEqual(metric.metric_type, MetricType.RATIO)
+        self.assertEqual(metric.direction, MetricDirection.HIGHER_IS_BETTER)
+        self.assertTrue(metric.is_active)
+
+    def test_create_count_metric(self) -> None:
+        metric = metric_definition_create(
+            key="purchase_count",
+            name="Purchase Count",
+            metric_type=MetricType.COUNT,
+            calculation_rule={"type": "count", "event": "purchase"},
+        )
+        self.assertEqual(metric.metric_type, MetricType.COUNT)
+
+    def test_create_average_metric(self) -> None:
+        metric = metric_definition_create(
+            key="avg_latency",
+            name="Average Latency",
+            metric_type=MetricType.AVERAGE,
+            direction=MetricDirection.LOWER_IS_BETTER,
+            calculation_rule={
+                "type": "average",
+                "event": "page_loaded",
+                "property": "latency_ms",
+            },
+        )
+        self.assertEqual(metric.metric_type, MetricType.AVERAGE)
+
+    def test_create_percentile_metric(self) -> None:
+        metric = metric_definition_create(
+            key="p95_latency",
+            name="P95 Latency",
+            metric_type=MetricType.PERCENTILE,
+            direction=MetricDirection.LOWER_IS_BETTER,
+            calculation_rule={
+                "type": "percentile",
+                "event": "page_loaded",
+                "property": "latency_ms",
+                "percentile": 95,
+            },
+        )
+        self.assertEqual(metric.metric_type, MetricType.PERCENTILE)
+
+    def test_reject_duplicate_key(self) -> None:
+        metric_definition_create(
+            key="dup_metric",
+            name="First",
+            metric_type=MetricType.COUNT,
+            calculation_rule={"type": "count", "event": "click"},
+        )
+        with self.assertRaises(ConflictError):
+            metric_definition_create(
+                key="dup_metric",
+                name="Second",
+                metric_type=MetricType.COUNT,
+                calculation_rule={"type": "count", "event": "click"},
+            )
+
+    def test_reject_missing_required_rule_fields(self) -> None:
+        with self.assertRaises(ValidationError):
+            metric_definition_create(
+                key="bad_metric",
+                name="Bad",
+                metric_type=MetricType.RATIO,
+                calculation_rule={"type": "ratio", "numerator_event": "click"},
+            )
+
+    def test_reject_invalid_key_pattern(self) -> None:
+        with self.assertRaises(ValidationError):
+            metric_definition_create(
+                key="Invalid-KEY",
+                name="Bad Key",
+                metric_type=MetricType.COUNT,
+                calculation_rule={"type": "count", "event": "x"},
+            )
+
+
+class MetricDefinitionUpdateTest(TestCase):
+    def test_update_name_and_description(self) -> None:
+        metric = metric_definition_create(
+            key="upd_metric",
+            name="Old Name",
+            metric_type=MetricType.COUNT,
+            calculation_rule={"type": "count", "event": "click"},
+        )
+        updated = metric_definition_update(
+            metric=metric,
+            name="New Name",
+            description="Desc",
+        )
+        self.assertEqual(updated.name, "New Name")
+        self.assertEqual(updated.description, "Desc")
+
+    def test_deactivate_metric(self) -> None:
+        metric = metric_definition_create(
+            key="deact_metric",
+            name="Metric",
+            metric_type=MetricType.COUNT,
+            calculation_rule={"type": "count", "event": "click"},
+        )
+        updated = metric_definition_update(metric=metric, is_active=False)
+        self.assertFalse(updated.is_active)
+
+    def test_reject_update_key(self) -> None:
+        metric = metric_definition_create(
+            key="immut_metric",
+            name="Metric",
+            metric_type=MetricType.COUNT,
+            calculation_rule={"type": "count", "event": "click"},
+        )
+        with self.assertRaises(ValidationError):
+            metric_definition_update(metric=metric, key="new_key")
+
+
+class ExperimentMetricTest(TestCase):
+    def setUp(self) -> None:
+        self.experiment = make_experiment(suffix="_em")
+        self.metric = metric_definition_create(
+            key="em_click_rate",
+            name="Click Rate",
+            metric_type=MetricType.RATIO,
+            calculation_rule={
+                "type": "ratio",
+                "numerator_event": "click",
+                "denominator_event": "exposure",
+            },
+        )
+
+    def test_add_metric_to_experiment(self) -> None:
+        em = experiment_metric_add(
+            experiment=self.experiment,
+            metric=self.metric,
+            is_primary=True,
+        )
+        self.assertTrue(em.is_primary)
+        self.assertEqual(em.experiment, self.experiment)
+        self.assertEqual(em.metric, self.metric)
+
+    def test_list_experiment_metrics(self) -> None:
+        experiment_metric_add(
+            experiment=self.experiment,
+            metric=self.metric,
+        )
+        ems = experiment_metric_list(self.experiment)
+        self.assertEqual(ems.count(), 1)
+
+    def test_remove_metric_from_experiment(self) -> None:
+        experiment_metric_add(
+            experiment=self.experiment,
+            metric=self.metric,
+        )
+        experiment_metric_remove(
+            experiment=self.experiment,
+            metric=self.metric,
+        )
+        self.assertEqual(
+            ExperimentMetric.objects.filter(
+                experiment=self.experiment,
+            ).count(),
+            0,
+        )
+
+    def test_reject_remove_nonexistent(self) -> None:
+        with self.assertRaises(ValidationError):
+            experiment_metric_remove(
+                experiment=self.experiment,
+                metric=self.metric,
+            )
+
+    def test_reject_duplicate_metric(self) -> None:
+        experiment_metric_add(
+            experiment=self.experiment,
+            metric=self.metric,
+        )
+        with self.assertRaises(ConflictError):
+            experiment_metric_add(
+                experiment=self.experiment,
+                metric=self.metric,
+            )
+
+    def test_primary_metric_switch(self) -> None:
+        m2 = metric_definition_create(
+            key="em_error_rate",
+            name="Error Rate",
+            metric_type=MetricType.RATIO,
+            calculation_rule={
+                "type": "ratio",
+                "numerator_event": "error",
+                "denominator_event": "exposure",
+            },
+        )
+        em1 = experiment_metric_add(
+            experiment=self.experiment,
+            metric=self.metric,
+            is_primary=True,
+        )
+        em2 = experiment_metric_add(
+            experiment=self.experiment,
+            metric=m2,
+            is_primary=True,
+        )
+        em1.refresh_from_db()
+        self.assertFalse(em1.is_primary)
+        self.assertTrue(em2.is_primary)
+
+
+class GuardrailServiceTest(TestCase):
+    def setUp(self) -> None:
+        self.experiment = make_experiment(suffix="_gr")
+        self.metric = metric_definition_create(
+            key="gr_error_rate",
+            name="Error Rate",
+            metric_type=MetricType.RATIO,
+            direction=MetricDirection.LOWER_IS_BETTER,
+            calculation_rule={
+                "type": "ratio",
+                "numerator_event": "error",
+                "denominator_event": "exposure",
+            },
+        )
+
+    def test_create_guardrail(self) -> None:
+        g = guardrail_create(
+            experiment=self.experiment,
+            metric=self.metric,
+            threshold=Decimal("0.05"),
+            observation_window_minutes=30,
+            action=GuardrailAction.PAUSE,
+        )
+        self.assertEqual(g.threshold, Decimal("0.05"))
+        self.assertEqual(g.action, GuardrailAction.PAUSE)
+
+    def test_list_guardrails(self) -> None:
+        guardrail_create(
+            experiment=self.experiment,
+            metric=self.metric,
+            threshold=Decimal("0.05"),
+        )
+        grs = guardrail_list(self.experiment)
+        self.assertEqual(grs.count(), 1)
+
+    def test_update_guardrail_in_draft(self) -> None:
+        g = guardrail_create(
+            experiment=self.experiment,
+            metric=self.metric,
+            threshold=Decimal("0.05"),
+        )
+        updated = guardrail_update(
+            guardrail=g,
+            threshold=Decimal("0.10"),
+        )
+        self.assertEqual(updated.threshold, Decimal("0.10"))
+
+    def test_reject_update_after_start(self) -> None:
+        review_settings_update(
+            default_min_approvals=1, allow_any_approver=True
+        )
+        approver = make_approver("_gu")
+        add_two_variants(self.experiment)
+        exp = experiment_submit_for_review(
+            experiment=self.experiment,
+            user=self.experiment.owner,
+        )
+        exp = experiment_approve(experiment=exp, approver=approver)
+        experiment_start(experiment=exp, user=self.experiment.owner)
+        g = guardrail_create(
+            experiment=self.experiment,
+            metric=self.metric,
+            threshold=Decimal("0.05"),
+        )
+        with self.assertRaises(ValidationError):
+            guardrail_update(guardrail=g, threshold=Decimal("0.10"))
+
+    def test_delete_guardrail_in_draft(self) -> None:
+        g = guardrail_create(
+            experiment=self.experiment,
+            metric=self.metric,
+            threshold=Decimal("0.05"),
+        )
+        guardrail_delete(guardrail=g)
+        self.assertEqual(Guardrail.objects.count(), 0)
+
+    def test_reject_delete_after_start(self) -> None:
+        review_settings_update(
+            default_min_approvals=1, allow_any_approver=True
+        )
+        approver = make_approver("_gd")
+        add_two_variants(self.experiment)
+        exp = experiment_submit_for_review(
+            experiment=self.experiment,
+            user=self.experiment.owner,
+        )
+        exp = experiment_approve(experiment=exp, approver=approver)
+        experiment_start(experiment=exp, user=self.experiment.owner)
+        g = guardrail_create(
+            experiment=self.experiment,
+            metric=self.metric,
+            threshold=Decimal("0.05"),
+        )
+        with self.assertRaises(ValidationError):
+            guardrail_delete(guardrail=g)