From f8ae0798d38cde09e27cd5edf5f07adf57ed3222 Mon Sep 17 00:00:00 2001 From: ITQ Date: Fri, 21 Feb 2025 13:31:39 +0300 Subject: [PATCH] perf: improved campaigns suggestion perfomance by caching some things --- .../backend/api/v1/advertisers/tests.py | 23 ++- .../apps/campaign/management/__init__.py | 0 .../campaign/management/commands/__init__.py | 0 .../management/commands/init_cache.py | 35 ++++ .../services/backend/apps/campaign/models.py | 180 ++++++++++++------ .../services/backend/apps/mlscore/models.py | 12 ++ .../services/backend/apps/mlscore/tests.py | 31 ++- solution/services/backend/pyproject.toml | 1 + solution/services/backend/scripts/initdb | 2 + 9 files changed, 220 insertions(+), 64 deletions(-) create mode 100644 solution/services/backend/apps/campaign/management/__init__.py create mode 100644 solution/services/backend/apps/campaign/management/commands/__init__.py create mode 100644 solution/services/backend/apps/campaign/management/commands/init_cache.py diff --git a/solution/services/backend/api/v1/advertisers/tests.py b/solution/services/backend/api/v1/advertisers/tests.py index e17be6c..71ddf54 100644 --- a/solution/services/backend/api/v1/advertisers/tests.py +++ b/solution/services/backend/api/v1/advertisers/tests.py @@ -2,7 +2,7 @@ import json import uuid from http import HTTPStatus as status -from django.test import TestCase, Client +from django.test import TestCase, Client, override_settings from apps.advertiser.models import Advertiser from apps.client.models import Client as ClientModel from apps.mlscore.models import Mlscore @@ -21,6 +21,13 @@ class TestMlscoreEndpoint(TestCase): self.url = "/ml-scores" + @override_settings( + CACHES={ + "default": { + "BACKEND": "django.core.cache.backends.locmem.LocMemCache", + } + } + ) def test_create_mlscore_success(self): data = { "advertiser_id": str(self.advertiser.id), @@ -35,6 +42,13 @@ class TestMlscoreEndpoint(TestCase): self.assertEqual(response.status_code, status.OK) self.assertEqual(mlscore.score, 90) + @override_settings( + CACHES={ + "default": { + "BACKEND": "django.core.cache.backends.locmem.LocMemCache", + } + } + ) def test_update_mlscore_success(self): mlscore = Mlscore.objects.create( advertiser=self.advertiser, @@ -74,6 +88,13 @@ class TestMlscoreEndpoint(TestCase): self.assertEqual(response.status_code, status.BAD_REQUEST) + @override_settings( + CACHES={ + "default": { + "BACKEND": "django.core.cache.backends.locmem.LocMemCache", + } + } + ) def test_non_existing_client(self): data = { "advertiser_id": str(self.advertiser.id), diff --git a/solution/services/backend/apps/campaign/management/__init__.py b/solution/services/backend/apps/campaign/management/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/solution/services/backend/apps/campaign/management/commands/__init__.py b/solution/services/backend/apps/campaign/management/commands/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/solution/services/backend/apps/campaign/management/commands/init_cache.py b/solution/services/backend/apps/campaign/management/commands/init_cache.py new file mode 100644 index 0000000..2261eaa --- /dev/null +++ b/solution/services/backend/apps/campaign/management/commands/init_cache.py @@ -0,0 +1,35 @@ +from typing import Any + +from django.core.management.base import BaseCommand + +from apps.campaign.models import Campaign +from apps.mlscore.models import Mlscore + + +class Command(BaseCommand): + help = ( + "Initialize cache with current counts of " + "impressions, clicks, and ML scores." + ) + + def handle(self, *args: Any, **kwargs: Any) -> None: + for campaign in Campaign.objects.all(): + campaign.setup_cache() + self.stdout.write( + self.style.SUCCESS( + f"Initialized cache for Campaign {campaign.id}: " + f"{campaign.impressions_count} impressions, " + f"{campaign.clicks_count} clicks." + ) + ) + + for mlscore in Mlscore.objects.all(): + mlscore.setup_cache() + self.stdout.write( + self.style.SUCCESS( + f"Initialized cache for MLscore: " + f"Client {mlscore.client_id}, " + f"Advertiser {mlscore.advertiser_id}, " + f"Score {mlscore.score}." + ) + ) diff --git a/solution/services/backend/apps/campaign/models.py b/solution/services/backend/apps/campaign/models.py index 40cd975..a60d5b8 100644 --- a/solution/services/backend/apps/campaign/models.py +++ b/solution/services/backend/apps/campaign/models.py @@ -1,8 +1,10 @@ -import contextlib +import random from decimal import ROUND_HALF_UP, Decimal +from logging import Logger from typing import Any, Self from uuid import UUID +from django.conf import settings from django.core.cache import cache from django.core.exceptions import ValidationError from django.core.validators import ( @@ -22,9 +24,10 @@ from apps.campaign.validators import ( ) from apps.client.models import Client from apps.core.models import BaseModel -from apps.mlscore.models import Mlscore from config.errors import ConflictError, ForbiddenError +logger: Logger = settings.LOGGER + class Campaign(BaseModel): class GenderChoices(models.TextChoices): @@ -113,6 +116,24 @@ class Campaign(BaseModel): if self.start_date < current_date: raise ValidationError(err) from None + def save(self, *args: Any, **kwargs: Any) -> None: + created = self.pk is None + + super().save(*args, **kwargs) + + if created: + self.setup_cache() + + def setup_cache(self) -> None: + cache.add( + f"campaign_{self.id}_impressions_count", self.impressions.count() + ) + cache.add(f"campaign_{self.id}_clicks_count", self.clicks.count()) + cache.set( + f"campaign_{self.id}_impressions_count", self.impressions.count() + ) + cache.set(f"campaign_{self.id}_clicks_count", self.clicks.count()) + @property def ad_id(self) -> UUID: return self.id @@ -138,32 +159,54 @@ class Campaign(BaseModel): and cache.get("current_date", default=0) <= self.end_date ) + @property + def impressions_count(self) -> int: + return cache.get(f"campaign_{self.id}_impressions_count", 0) + + @property + def clicks_count(self) -> int: + return cache.get(f"campaign_{self.id}_clicks_count", 0) + def view(self, client: Client) -> None: - with contextlib.suppress(ConflictError): + try: CampaignImpression.objects.create( - campaign=self, - client=client, + campaign_id=self.id, + client_id=client.id, price=self.cost_per_impression, date=cache.get("current_date", default=0), ) + try: + cache.incr(f"campaign_{self.id}_impressions_count", 1) + except ValueError: + self.setup_cache() + logger.warning( + "Seems that %s missing caches", self.campaign_id + ) + except ConflictError: + pass def click(self, client: Client) -> None: - if not self.active: - err = "Can't click on inactive campaign." - raise ForbiddenError(err) - try: CampaignImpression.objects.get(campaign=self, client=client) except CampaignImpression.DoesNotExist: raise ForbiddenError from None - with contextlib.suppress(ConflictError): + try: CampaignClick.objects.create( - campaign=self, - client=client, + campaign_id=self.id, + client_id=client.id, price=self.cost_per_click, date=cache.get("current_date", default=0), ) + try: + cache.incr(f"campaign_{self.id}_clicks_count", 1) + except ValueError: + self.setup_cache() + logger.warning( + "Seems that %s missing caches", self.campaign_id + ) + except ConflictError: + pass def get_statistics(self) -> dict[str, Any]: impressions = self.impressions.aggregate( @@ -278,69 +321,69 @@ class Campaign(BaseModel): | models.Q(age_from__isnull=True) ) & (models.Q(age_to__gte=client.age) | models.Q(age_to__isnull=True)) - return ( - cls.objects.filter( - date_filter, - location_filter, - gender_filter, - age_filter, - ) - .select_related("advertiser") - .prefetch_related("clicks", "impressions", "advertiser__mlscores") + return cls.objects.filter( + date_filter, + location_filter, + gender_filter, + age_filter, + ).only( + Campaign.id.field.name, + Campaign.advertiser_id.field.name, + Campaign.impressions_limit.field.name, + Campaign.clicks_limit.field.name, + Campaign.cost_per_impression.field.name, + Campaign.cost_per_click.field.name, ) @classmethod def suggest(cls, client: Client) -> Self: - base_campaigns = cls.get_available_campaigns(client) - if not base_campaigns or base_campaigns == []: + campaigns = cls.get_available_campaigns(client) + if not campaigns or campaigns == []: return None - advertiser_ids = list({c.advertiser_id for c in base_campaigns}) - ml_scores = Mlscore.objects.filter( - client=client, advertiser_id__in=advertiser_ids - ).values("advertiser_id", "score") - ml_dict = {m["advertiser_id"]: m["score"] for m in ml_scores} - - campaigns = list( - base_campaigns.annotate( - impressions_count=models.Count("impressions"), - clicks_count=models.Count("clicks"), - ) - ) campaign_ids = [c.id for c in campaigns] - client_impressions = set( - CampaignImpression.objects.filter( - client=client, campaign_id__in=campaign_ids - ).values_list("campaign_id", flat=True) - ) - client_clicks = set( - CampaignClick.objects.filter( - client=client, campaign_id__in=campaign_ids - ).values_list("campaign_id", flat=True) - ) + client_impressions = CampaignImpression.objects.filter( + client=client, campaign_id__in=campaign_ids + ).values_list("campaign_id", flat=True) + client_clicks = CampaignClick.objects.filter( + client=client, campaign_id__in=campaign_ids + ).values_list("campaign_id", flat=True) prioritized = [] ml_values = [] profit_values = [] + exceed_impressions_chance = ( # oh, can i just skip commenting this? + *(0 for i in range(4)), + *(1 for i in range(1)), + ) for campaign in campaigns: - if campaign.impressions_count >= campaign.impressions_limit: - continue - - ml_score = ml_dict.get(campaign.advertiser_id, 0) - ml_values.append(ml_score) - has_impression = campaign.id in client_impressions has_click = campaign.id in client_clicks + if not has_impression: + allow_exceed_impressions = random.choice( + exceed_impressions_chance + ) + impressions_limit = round( + campaign.impressions_limit + + campaign.impressions_limit + * 0.01 + * allow_exceed_impressions + ) + if campaign.impressions_count >= impressions_limit: + continue + + ml_score = cache.get( + f"mlscore_{client.id}_{campaign.advertiser_id}", 0 + ) + ml_values.append(ml_score) + if has_impression: profit = campaign.cost_per_click if not has_click else 0 else: profit = campaign.cost_per_impression + campaign.cost_per_click - print(profit) - if profit <= 0: - continue profit_values.append(profit) @@ -364,28 +407,43 @@ class Campaign(BaseModel): ) ) - max_ml = max(ml_values) if ml_values else 1 - max_profit = max(profit_values) if profit_values else 1 - min_profit = min(profit_values) if profit_values else 0 + if not ml_values or not profit_values: + return None + + max_ml = max(ml_values) + max_profit = max(profit_values) + min_profit = min(profit_values) profit_range = ( max_profit - min_profit if max_profit != min_profit else 1 ) - print(prioritized, max_ml, max_profit, min_profit, profit_range) - final_list = [] for campaign, metrics in prioritized: norm_profit = (metrics["profit"] - min_profit) / profit_range norm_ml = metrics["ml"] / max_ml if max_ml > 0 else 0 priority = ( - 0.5 * norm_profit + 0.25 * norm_ml + 0.15 * metrics["capacity"] + 0.7 * norm_profit + 0.2 * norm_ml + 0.1 * metrics["capacity"] ) final_list.append((campaign, priority)) final_list.sort(key=lambda x: -x[1]) - return final_list[0][0] if len(final_list) >= 1 else None + + if len(final_list) != 0: + campaign = final_list[0][0] + + return Campaign.objects.only( + Campaign.id.field.name, + Campaign.advertiser_id.field.name, + Campaign.ad_title.field.name, + Campaign.ad_text.field.name, + Campaign.ad_image.field.name, + Campaign.cost_per_impression.field.name, + Campaign.cost_per_click.field.name, + ).get(id=campaign.id) + + return None class CampaignImpression(BaseModel): diff --git a/solution/services/backend/apps/mlscore/models.py b/solution/services/backend/apps/mlscore/models.py index 15d65ce..76ae4aa 100644 --- a/solution/services/backend/apps/mlscore/models.py +++ b/solution/services/backend/apps/mlscore/models.py @@ -1,3 +1,6 @@ +from typing import Any + +from django.core.cache import cache from django.db import models from apps.advertiser.models import Advertiser @@ -21,6 +24,15 @@ class Mlscore(BaseModel): def __str__(self) -> str: return f"{self.advertiser.name} | {self.client.login}" + def save(self, *args: Any, **kwargs: Any) -> None: + super().save(*args, **kwargs) + + self.setup_cache() + + def setup_cache(self) -> None: + cache.add(f"mlscore_{self.client_id}_{self.advertiser_id}", self.score) + cache.set(f"mlscore_{self.client_id}_{self.advertiser_id}", self.score) + class Meta: unique_together = ( "advertiser", diff --git a/solution/services/backend/apps/mlscore/tests.py b/solution/services/backend/apps/mlscore/tests.py index dc1dd07..8739e9f 100644 --- a/solution/services/backend/apps/mlscore/tests.py +++ b/solution/services/backend/apps/mlscore/tests.py @@ -1,5 +1,4 @@ -from django.test import TestCase -from django.db.utils import IntegrityError +from django.test import TestCase, override_settings from django.core.exceptions import ValidationError from config.errors import ConflictError from apps.advertiser.models import Advertiser @@ -17,6 +16,13 @@ class MlscoreModelTest(TestCase): gender=Client.GenderChoices.MALE, ) + @override_settings( + CACHES={ + "default": { + "BACKEND": "django.core.cache.backends.locmem.LocMemCache", + } + } + ) def test_create_mlscore(self): mlscore = Mlscore.objects.create( advertiser=self.advertiser, @@ -27,6 +33,13 @@ class MlscoreModelTest(TestCase): self.assertEqual(mlscore.score, 95) self.assertEqual(str(mlscore), "Test Advertiser | test_client") + @override_settings( + CACHES={ + "default": { + "BACKEND": "django.core.cache.backends.locmem.LocMemCache", + } + } + ) def test_mlscore_unique_together_constraint(self): Mlscore.objects.create( advertiser=self.advertiser, @@ -41,6 +54,13 @@ class MlscoreModelTest(TestCase): score=85, ) + @override_settings( + CACHES={ + "default": { + "BACKEND": "django.core.cache.backends.locmem.LocMemCache", + } + } + ) def test_delete_advertiser_cascades(self): mlscore = Mlscore.objects.create( advertiser=self.advertiser, @@ -51,6 +71,13 @@ class MlscoreModelTest(TestCase): self.assertFalse(Mlscore.objects.filter(id=mlscore.id).exists()) + @override_settings( + CACHES={ + "default": { + "BACKEND": "django.core.cache.backends.locmem.LocMemCache", + } + } + ) def test_delete_client_cascades(self): mlscore = Mlscore.objects.create( advertiser=self.advertiser, diff --git a/solution/services/backend/pyproject.toml b/solution/services/backend/pyproject.toml index a72da2a..5ee2ff5 100644 --- a/solution/services/backend/pyproject.toml +++ b/solution/services/backend/pyproject.toml @@ -122,6 +122,7 @@ ignore = [ "PT009", "PT027", "RUF001", + "S311", ] logger-objects = [] per-file-ignores = {} diff --git a/solution/services/backend/scripts/initdb b/solution/services/backend/scripts/initdb index f2d64eb..f9488d1 100755 --- a/solution/services/backend/scripts/initdb +++ b/solution/services/backend/scripts/initdb @@ -9,3 +9,5 @@ fi if [ "$DJANGO_CREATE_SUPERUSER" = "True" ]; then python manage.py createsuperuser --noinput --username "$DJANGO_SUPERUSER_USERNAME" --email "$DJANGO_SUPERUSER_EMAIL" || true fi + +python manage.py init_cache