feat(reports): added reports business logic
This commit is contained in:
@@ -0,0 +1,5 @@
|
||||
from django.apps import AppConfig
|
||||
|
||||
|
||||
class ReportsConfig(AppConfig):
|
||||
name = "apps.reports"
|
||||
@@ -0,0 +1,299 @@
|
||||
from datetime import datetime
|
||||
from decimal import Decimal
|
||||
from uuid import UUID
|
||||
|
||||
from apps.events.models import Event, Exposure
|
||||
from apps.experiments.models import Experiment
|
||||
from apps.metrics.models import (
|
||||
ExperimentMetric,
|
||||
MetricDefinition,
|
||||
MetricType,
|
||||
)
|
||||
|
||||
|
||||
def _exposure_decision_ids(
|
||||
experiment_id: UUID,
|
||||
variant_id: UUID,
|
||||
start_date: datetime | None = None,
|
||||
end_date: datetime | None = None,
|
||||
) -> list[str]:
|
||||
qs = Exposure.objects.filter(
|
||||
experiment_id=experiment_id,
|
||||
variant_id=variant_id,
|
||||
)
|
||||
if start_date:
|
||||
qs = qs.filter(timestamp__gte=start_date)
|
||||
if end_date:
|
||||
qs = qs.filter(timestamp__lt=end_date)
|
||||
return list(qs.values_list("decision_id", flat=True))
|
||||
|
||||
|
||||
def _count_events(
|
||||
decision_ids: list[str],
|
||||
event_type_name: str,
|
||||
start_date: datetime | None = None,
|
||||
end_date: datetime | None = None,
|
||||
) -> int:
|
||||
qs = Event.objects.filter(
|
||||
decision_id__in=decision_ids,
|
||||
event_type__name=event_type_name,
|
||||
is_attributed=True,
|
||||
)
|
||||
if start_date:
|
||||
qs = qs.filter(timestamp__gte=start_date)
|
||||
if end_date:
|
||||
qs = qs.filter(timestamp__lt=end_date)
|
||||
return qs.count()
|
||||
|
||||
|
||||
def _count_unique_subjects(
|
||||
decision_ids: list[str],
|
||||
event_type_name: str,
|
||||
start_date: datetime | None = None,
|
||||
end_date: datetime | None = None,
|
||||
) -> int:
|
||||
qs = Event.objects.filter(
|
||||
decision_id__in=decision_ids,
|
||||
event_type__name=event_type_name,
|
||||
is_attributed=True,
|
||||
)
|
||||
if start_date:
|
||||
qs = qs.filter(timestamp__gte=start_date)
|
||||
if end_date:
|
||||
qs = qs.filter(timestamp__lt=end_date)
|
||||
return qs.values("subject_id").distinct().count()
|
||||
|
||||
|
||||
def _average_property(
|
||||
decision_ids: list[str],
|
||||
event_type_name: str,
|
||||
property_field: str,
|
||||
start_date: datetime | None = None,
|
||||
end_date: datetime | None = None,
|
||||
) -> Decimal | None:
|
||||
qs = Event.objects.filter(
|
||||
decision_id__in=decision_ids,
|
||||
event_type__name=event_type_name,
|
||||
is_attributed=True,
|
||||
)
|
||||
if start_date:
|
||||
qs = qs.filter(timestamp__gte=start_date)
|
||||
if end_date:
|
||||
qs = qs.filter(timestamp__lt=end_date)
|
||||
|
||||
values = []
|
||||
for props in qs.values_list("properties", flat=True):
|
||||
if isinstance(props, dict) and property_field in props:
|
||||
try:
|
||||
values.append(float(props[property_field]))
|
||||
except (TypeError, ValueError):
|
||||
continue
|
||||
|
||||
if not values:
|
||||
return None
|
||||
return Decimal(str(sum(values) / len(values)))
|
||||
|
||||
|
||||
def _percentile_property(
|
||||
decision_ids: list[str],
|
||||
event_type_name: str,
|
||||
property_field: str,
|
||||
percentile: int,
|
||||
start_date: datetime | None = None,
|
||||
end_date: datetime | None = None,
|
||||
) -> Decimal | None:
|
||||
qs = Event.objects.filter(
|
||||
decision_id__in=decision_ids,
|
||||
event_type__name=event_type_name,
|
||||
is_attributed=True,
|
||||
)
|
||||
if start_date:
|
||||
qs = qs.filter(timestamp__gte=start_date)
|
||||
if end_date:
|
||||
qs = qs.filter(timestamp__lt=end_date)
|
||||
|
||||
values = []
|
||||
for props in qs.values_list("properties", flat=True):
|
||||
if isinstance(props, dict) and property_field in props:
|
||||
try:
|
||||
values.append(float(props[property_field]))
|
||||
except (TypeError, ValueError):
|
||||
continue
|
||||
|
||||
if not values:
|
||||
return None
|
||||
|
||||
values.sort()
|
||||
idx = int(len(values) * percentile / 100)
|
||||
idx = min(idx, len(values) - 1)
|
||||
return Decimal(str(values[idx]))
|
||||
|
||||
|
||||
def calculate_metric_value(
|
||||
metric: MetricDefinition,
|
||||
experiment_id: UUID,
|
||||
variant_id: UUID,
|
||||
start_date: datetime | None = None,
|
||||
end_date: datetime | None = None,
|
||||
) -> Decimal | None:
|
||||
rule = metric.calculation_rule
|
||||
decision_ids = _exposure_decision_ids(
|
||||
experiment_id,
|
||||
variant_id,
|
||||
start_date,
|
||||
end_date,
|
||||
)
|
||||
|
||||
if not decision_ids:
|
||||
return None
|
||||
|
||||
metric_type = rule.get("type", metric.metric_type)
|
||||
|
||||
if metric_type == MetricType.RATIO:
|
||||
numerator = _count_events(
|
||||
decision_ids,
|
||||
rule["numerator_event"],
|
||||
start_date,
|
||||
end_date,
|
||||
)
|
||||
denominator = _count_events(
|
||||
decision_ids,
|
||||
rule["denominator_event"],
|
||||
start_date,
|
||||
end_date,
|
||||
)
|
||||
if denominator == 0:
|
||||
return Decimal(0)
|
||||
return Decimal(str(round(numerator / denominator, 6)))
|
||||
|
||||
if metric_type == MetricType.COUNT:
|
||||
count = _count_events(
|
||||
decision_ids,
|
||||
rule["event"],
|
||||
start_date,
|
||||
end_date,
|
||||
)
|
||||
return Decimal(str(count))
|
||||
|
||||
if metric_type == MetricType.AVERAGE:
|
||||
return _average_property(
|
||||
decision_ids,
|
||||
rule["event"],
|
||||
rule["property"],
|
||||
start_date,
|
||||
end_date,
|
||||
)
|
||||
|
||||
if metric_type == MetricType.PERCENTILE:
|
||||
return _percentile_property(
|
||||
decision_ids,
|
||||
rule["event"],
|
||||
rule["property"],
|
||||
rule.get("percentile", 95),
|
||||
start_date,
|
||||
end_date,
|
||||
)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def _exposure_count_for_variant(
|
||||
experiment_id: UUID,
|
||||
variant_id: UUID,
|
||||
start_date: datetime | None = None,
|
||||
end_date: datetime | None = None,
|
||||
) -> int:
|
||||
qs = Exposure.objects.filter(
|
||||
experiment_id=experiment_id,
|
||||
variant_id=variant_id,
|
||||
)
|
||||
if start_date:
|
||||
qs = qs.filter(timestamp__gte=start_date)
|
||||
if end_date:
|
||||
qs = qs.filter(timestamp__lt=end_date)
|
||||
return qs.count()
|
||||
|
||||
|
||||
def _unique_subjects_for_variant(
|
||||
experiment_id: UUID,
|
||||
variant_id: UUID,
|
||||
start_date: datetime | None = None,
|
||||
end_date: datetime | None = None,
|
||||
) -> int:
|
||||
qs = Exposure.objects.filter(
|
||||
experiment_id=experiment_id,
|
||||
variant_id=variant_id,
|
||||
)
|
||||
if start_date:
|
||||
qs = qs.filter(timestamp__gte=start_date)
|
||||
if end_date:
|
||||
qs = qs.filter(timestamp__lt=end_date)
|
||||
return qs.values("subject_id").distinct().count()
|
||||
|
||||
|
||||
def build_experiment_report(
|
||||
experiment: Experiment,
|
||||
start_date: datetime | None = None,
|
||||
end_date: datetime | None = None,
|
||||
) -> dict:
|
||||
experiment_metrics = (
|
||||
ExperimentMetric.objects.filter(experiment=experiment)
|
||||
.select_related("metric")
|
||||
.order_by("-is_primary", "metric__key")
|
||||
)
|
||||
variants = experiment.variants.all()
|
||||
|
||||
variant_reports = []
|
||||
for variant in variants:
|
||||
metric_results = []
|
||||
for em in experiment_metrics:
|
||||
value = calculate_metric_value(
|
||||
metric=em.metric,
|
||||
experiment_id=experiment.pk,
|
||||
variant_id=variant.pk,
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
)
|
||||
metric_results.append(
|
||||
{
|
||||
"metric_key": em.metric.key,
|
||||
"metric_name": em.metric.name,
|
||||
"metric_type": em.metric.metric_type,
|
||||
"direction": em.metric.direction,
|
||||
"is_primary": em.is_primary,
|
||||
"value": value,
|
||||
}
|
||||
)
|
||||
|
||||
variant_reports.append(
|
||||
{
|
||||
"variant_id": variant.pk,
|
||||
"variant_name": variant.name,
|
||||
"is_control": variant.is_control,
|
||||
"weight": variant.weight,
|
||||
"exposures": _exposure_count_for_variant(
|
||||
experiment.pk,
|
||||
variant.pk,
|
||||
start_date,
|
||||
end_date,
|
||||
),
|
||||
"unique_subjects": _unique_subjects_for_variant(
|
||||
experiment.pk,
|
||||
variant.pk,
|
||||
start_date,
|
||||
end_date,
|
||||
),
|
||||
"metrics": metric_results,
|
||||
}
|
||||
)
|
||||
|
||||
return {
|
||||
"experiment_id": experiment.pk,
|
||||
"experiment_name": experiment.name,
|
||||
"status": experiment.status,
|
||||
"period": {
|
||||
"start": start_date.isoformat() if start_date else None,
|
||||
"end": end_date.isoformat() if end_date else None,
|
||||
},
|
||||
"variants": variant_reports,
|
||||
}
|
||||
@@ -0,0 +1,354 @@
|
||||
from datetime import timedelta
|
||||
from decimal import Decimal
|
||||
|
||||
from django.test import TestCase
|
||||
from django.utils import timezone
|
||||
|
||||
from apps.events.services import decision_create, process_events_batch
|
||||
from apps.events.tests.helpers import make_event_type, make_exposure_type
|
||||
from apps.experiments.tests.helpers import add_two_variants, make_experiment
|
||||
from apps.metrics.models import MetricDirection, MetricType
|
||||
from apps.metrics.services import (
|
||||
experiment_metric_add,
|
||||
metric_definition_create,
|
||||
)
|
||||
from apps.reports.services import (
|
||||
build_experiment_report,
|
||||
calculate_metric_value,
|
||||
)
|
||||
|
||||
|
||||
class CalculateMetricValueTest(TestCase):
|
||||
def setUp(self) -> None:
|
||||
self.exposure_type = make_exposure_type()
|
||||
self.click_type = make_event_type(
|
||||
name="button_clicked",
|
||||
display_name="Button Clicked",
|
||||
requires_exposure=True,
|
||||
)
|
||||
self.error_type = make_event_type(
|
||||
name="error_occurred",
|
||||
display_name="Error",
|
||||
requires_exposure=False,
|
||||
)
|
||||
self.load_type = make_event_type(
|
||||
name="page_loaded",
|
||||
display_name="Page Loaded",
|
||||
requires_exposure=True,
|
||||
)
|
||||
|
||||
self.experiment = make_experiment(suffix="_rpt")
|
||||
self.v_control, self.v_treatment = add_two_variants(self.experiment)
|
||||
self.now = timezone.now()
|
||||
|
||||
def _create_decision_and_exposure(
|
||||
self,
|
||||
decision_id,
|
||||
subject_id,
|
||||
variant,
|
||||
) -> None:
|
||||
decision_create(
|
||||
decision_id=decision_id,
|
||||
flag_key="flag_rpt",
|
||||
subject_id=subject_id,
|
||||
experiment_id=str(self.experiment.pk),
|
||||
variant_id=str(variant.pk),
|
||||
value=variant.value,
|
||||
reason="experiment",
|
||||
)
|
||||
process_events_batch(
|
||||
[
|
||||
{
|
||||
"event_id": f"exp_{decision_id}",
|
||||
"event_type": "exposure",
|
||||
"decision_id": decision_id,
|
||||
"subject_id": subject_id,
|
||||
"timestamp": self.now.isoformat(),
|
||||
"properties": {},
|
||||
}
|
||||
]
|
||||
)
|
||||
|
||||
def _send_event(
|
||||
self, event_id, event_type, decision_id, subject_id, properties=None
|
||||
) -> None:
|
||||
process_events_batch(
|
||||
[
|
||||
{
|
||||
"event_id": event_id,
|
||||
"event_type": event_type,
|
||||
"decision_id": decision_id,
|
||||
"subject_id": subject_id,
|
||||
"timestamp": self.now.isoformat(),
|
||||
"properties": properties or {},
|
||||
}
|
||||
]
|
||||
)
|
||||
|
||||
def test_ratio_metric(self) -> None:
|
||||
metric = metric_definition_create(
|
||||
key="rpt_click_rate",
|
||||
name="Click Rate",
|
||||
metric_type=MetricType.RATIO,
|
||||
calculation_rule={
|
||||
"type": "ratio",
|
||||
"numerator_event": "button_clicked",
|
||||
"denominator_event": "exposure",
|
||||
},
|
||||
)
|
||||
self._create_decision_and_exposure(
|
||||
"dec_r1",
|
||||
"u1",
|
||||
self.v_treatment,
|
||||
)
|
||||
self._create_decision_and_exposure(
|
||||
"dec_r2",
|
||||
"u2",
|
||||
self.v_treatment,
|
||||
)
|
||||
self._send_event("evt_c1", "button_clicked", "dec_r1", "u1")
|
||||
|
||||
value = calculate_metric_value(
|
||||
metric=metric,
|
||||
experiment_id=self.experiment.pk,
|
||||
variant_id=self.v_treatment.pk,
|
||||
)
|
||||
self.assertEqual(value, Decimal("0.5"))
|
||||
|
||||
def test_count_metric(self) -> None:
|
||||
metric = metric_definition_create(
|
||||
key="rpt_click_count",
|
||||
name="Click Count",
|
||||
metric_type=MetricType.COUNT,
|
||||
calculation_rule={"type": "count", "event": "button_clicked"},
|
||||
)
|
||||
self._create_decision_and_exposure(
|
||||
"dec_cnt1",
|
||||
"u1",
|
||||
self.v_control,
|
||||
)
|
||||
self._send_event("evt_cnt1", "button_clicked", "dec_cnt1", "u1")
|
||||
self._send_event("evt_cnt2", "button_clicked", "dec_cnt1", "u1")
|
||||
|
||||
value = calculate_metric_value(
|
||||
metric=metric,
|
||||
experiment_id=self.experiment.pk,
|
||||
variant_id=self.v_control.pk,
|
||||
)
|
||||
self.assertEqual(value, Decimal(2))
|
||||
|
||||
def test_average_metric(self) -> None:
|
||||
metric = metric_definition_create(
|
||||
key="rpt_avg_latency",
|
||||
name="Avg Latency",
|
||||
metric_type=MetricType.AVERAGE,
|
||||
calculation_rule={
|
||||
"type": "average",
|
||||
"event": "page_loaded",
|
||||
"property": "latency_ms",
|
||||
},
|
||||
)
|
||||
self._create_decision_and_exposure(
|
||||
"dec_avg1",
|
||||
"u1",
|
||||
self.v_treatment,
|
||||
)
|
||||
self._send_event(
|
||||
"evt_avg1",
|
||||
"page_loaded",
|
||||
"dec_avg1",
|
||||
"u1",
|
||||
properties={"latency_ms": 100},
|
||||
)
|
||||
self._send_event(
|
||||
"evt_avg2",
|
||||
"page_loaded",
|
||||
"dec_avg1",
|
||||
"u1",
|
||||
properties={"latency_ms": 200},
|
||||
)
|
||||
|
||||
value = calculate_metric_value(
|
||||
metric=metric,
|
||||
experiment_id=self.experiment.pk,
|
||||
variant_id=self.v_treatment.pk,
|
||||
)
|
||||
self.assertEqual(value, Decimal("150.0"))
|
||||
|
||||
def test_no_data_returns_none(self) -> None:
|
||||
metric = metric_definition_create(
|
||||
key="rpt_empty",
|
||||
name="Empty",
|
||||
metric_type=MetricType.COUNT,
|
||||
calculation_rule={"type": "count", "event": "button_clicked"},
|
||||
)
|
||||
value = calculate_metric_value(
|
||||
metric=metric,
|
||||
experiment_id=self.experiment.pk,
|
||||
variant_id=self.v_control.pk,
|
||||
)
|
||||
self.assertIsNone(value)
|
||||
|
||||
def test_period_filter(self) -> None:
|
||||
metric = metric_definition_create(
|
||||
key="rpt_period",
|
||||
name="Period Test",
|
||||
metric_type=MetricType.COUNT,
|
||||
calculation_rule={"type": "count", "event": "button_clicked"},
|
||||
)
|
||||
self._create_decision_and_exposure(
|
||||
"dec_pf1",
|
||||
"u1",
|
||||
self.v_control,
|
||||
)
|
||||
self._send_event("evt_pf1", "button_clicked", "dec_pf1", "u1")
|
||||
|
||||
future = self.now + timedelta(hours=1)
|
||||
value = calculate_metric_value(
|
||||
metric=metric,
|
||||
experiment_id=self.experiment.pk,
|
||||
variant_id=self.v_control.pk,
|
||||
start_date=future,
|
||||
)
|
||||
self.assertIsNone(value)
|
||||
|
||||
|
||||
class BuildExperimentReportTest(TestCase):
|
||||
def setUp(self) -> None:
|
||||
self.exposure_type = make_exposure_type()
|
||||
self.click_type = make_event_type(
|
||||
name="build_clicked",
|
||||
display_name="Build Clicked",
|
||||
requires_exposure=True,
|
||||
)
|
||||
self.experiment = make_experiment(suffix="_bld")
|
||||
self.v_control, self.v_treatment = add_two_variants(self.experiment)
|
||||
|
||||
self.metric = metric_definition_create(
|
||||
key="bld_click_rate",
|
||||
name="Click Rate",
|
||||
metric_type=MetricType.RATIO,
|
||||
direction=MetricDirection.HIGHER_IS_BETTER,
|
||||
calculation_rule={
|
||||
"type": "ratio",
|
||||
"numerator_event": "build_clicked",
|
||||
"denominator_event": "exposure",
|
||||
},
|
||||
)
|
||||
experiment_metric_add(
|
||||
experiment=self.experiment,
|
||||
metric=self.metric,
|
||||
is_primary=True,
|
||||
)
|
||||
|
||||
self.now = timezone.now()
|
||||
|
||||
def _create_decision_and_exposure(
|
||||
self,
|
||||
decision_id,
|
||||
subject_id,
|
||||
variant,
|
||||
) -> None:
|
||||
decision_create(
|
||||
decision_id=decision_id,
|
||||
flag_key="flag_bld",
|
||||
subject_id=subject_id,
|
||||
experiment_id=str(self.experiment.pk),
|
||||
variant_id=str(variant.pk),
|
||||
value=variant.value,
|
||||
reason="experiment",
|
||||
)
|
||||
process_events_batch(
|
||||
[
|
||||
{
|
||||
"event_id": f"exp_{decision_id}",
|
||||
"event_type": "exposure",
|
||||
"decision_id": decision_id,
|
||||
"subject_id": subject_id,
|
||||
"timestamp": self.now.isoformat(),
|
||||
"properties": {},
|
||||
}
|
||||
]
|
||||
)
|
||||
|
||||
def _send_event(
|
||||
self, event_id, event_type, decision_id, subject_id
|
||||
) -> None:
|
||||
process_events_batch(
|
||||
[
|
||||
{
|
||||
"event_id": event_id,
|
||||
"event_type": event_type,
|
||||
"decision_id": decision_id,
|
||||
"subject_id": subject_id,
|
||||
"timestamp": self.now.isoformat(),
|
||||
"properties": {},
|
||||
}
|
||||
]
|
||||
)
|
||||
|
||||
def test_report_has_variant_breakdown(self) -> None:
|
||||
self._create_decision_and_exposure("dec_b1", "u1", self.v_control)
|
||||
self._create_decision_and_exposure("dec_b2", "u2", self.v_treatment)
|
||||
self._send_event("evt_b1", "build_clicked", "dec_b2", "u2")
|
||||
|
||||
report = build_experiment_report(self.experiment)
|
||||
|
||||
self.assertEqual(report["experiment_id"], self.experiment.pk)
|
||||
self.assertEqual(len(report["variants"]), 2)
|
||||
|
||||
control = next(v for v in report["variants"] if v["is_control"])
|
||||
treatment = next(v for v in report["variants"] if not v["is_control"])
|
||||
|
||||
self.assertEqual(control["exposures"], 1)
|
||||
self.assertEqual(control["unique_subjects"], 1)
|
||||
self.assertEqual(treatment["exposures"], 1)
|
||||
self.assertEqual(treatment["unique_subjects"], 1)
|
||||
|
||||
control_cr = control["metrics"][0]
|
||||
self.assertEqual(control_cr["metric_key"], "bld_click_rate")
|
||||
self.assertEqual(control_cr["value"], Decimal(0))
|
||||
self.assertTrue(control_cr["is_primary"])
|
||||
|
||||
treatment_cr = treatment["metrics"][0]
|
||||
self.assertEqual(treatment_cr["value"], Decimal(1))
|
||||
|
||||
def test_report_with_period_filter(self) -> None:
|
||||
self._create_decision_and_exposure("dec_fp1", "u1", self.v_control)
|
||||
self._send_event("evt_fp1", "build_clicked", "dec_fp1", "u1")
|
||||
|
||||
report = build_experiment_report(
|
||||
self.experiment,
|
||||
start_date=self.now - timedelta(hours=1),
|
||||
end_date=self.now + timedelta(hours=1),
|
||||
)
|
||||
control = next(v for v in report["variants"] if v["is_control"])
|
||||
self.assertEqual(control["exposures"], 1)
|
||||
self.assertIsNotNone(report["period"]["start"])
|
||||
self.assertIsNotNone(report["period"]["end"])
|
||||
|
||||
def test_report_empty_experiment(self) -> None:
|
||||
report = build_experiment_report(self.experiment)
|
||||
self.assertEqual(len(report["variants"]), 2)
|
||||
for v in report["variants"]:
|
||||
self.assertEqual(v["exposures"], 0)
|
||||
self.assertEqual(v["unique_subjects"], 0)
|
||||
self.assertIsNone(v["metrics"][0]["value"])
|
||||
|
||||
def test_report_shows_selected_metrics_only(self) -> None:
|
||||
m2 = metric_definition_create(
|
||||
key="bld_other_metric",
|
||||
name="Other",
|
||||
metric_type=MetricType.COUNT,
|
||||
calculation_rule={"type": "count", "event": "build_clicked"},
|
||||
)
|
||||
experiment_metric_add(
|
||||
experiment=self.experiment,
|
||||
metric=m2,
|
||||
)
|
||||
|
||||
report = build_experiment_report(self.experiment)
|
||||
for v in report["variants"]:
|
||||
self.assertEqual(len(v["metrics"]), 2)
|
||||
keys = {m["metric_key"] for m in v["metrics"]}
|
||||
self.assertEqual(keys, {"bld_click_rate", "bld_other_metric"})
|
||||
Reference in New Issue
Block a user