perf: improved campaigns suggestion perfomance by caching some things

This commit is contained in:
ITQ
2025-02-21 13:31:39 +03:00
parent 9d92bbdc68
commit f8ae0798d3
9 changed files with 220 additions and 64 deletions
@@ -2,7 +2,7 @@ import json
import uuid import uuid
from http import HTTPStatus as status from http import HTTPStatus as status
from django.test import TestCase, Client from django.test import TestCase, Client, override_settings
from apps.advertiser.models import Advertiser from apps.advertiser.models import Advertiser
from apps.client.models import Client as ClientModel from apps.client.models import Client as ClientModel
from apps.mlscore.models import Mlscore from apps.mlscore.models import Mlscore
@@ -21,6 +21,13 @@ class TestMlscoreEndpoint(TestCase):
self.url = "/ml-scores" self.url = "/ml-scores"
@override_settings(
CACHES={
"default": {
"BACKEND": "django.core.cache.backends.locmem.LocMemCache",
}
}
)
def test_create_mlscore_success(self): def test_create_mlscore_success(self):
data = { data = {
"advertiser_id": str(self.advertiser.id), "advertiser_id": str(self.advertiser.id),
@@ -35,6 +42,13 @@ class TestMlscoreEndpoint(TestCase):
self.assertEqual(response.status_code, status.OK) self.assertEqual(response.status_code, status.OK)
self.assertEqual(mlscore.score, 90) self.assertEqual(mlscore.score, 90)
@override_settings(
CACHES={
"default": {
"BACKEND": "django.core.cache.backends.locmem.LocMemCache",
}
}
)
def test_update_mlscore_success(self): def test_update_mlscore_success(self):
mlscore = Mlscore.objects.create( mlscore = Mlscore.objects.create(
advertiser=self.advertiser, advertiser=self.advertiser,
@@ -74,6 +88,13 @@ class TestMlscoreEndpoint(TestCase):
self.assertEqual(response.status_code, status.BAD_REQUEST) self.assertEqual(response.status_code, status.BAD_REQUEST)
@override_settings(
CACHES={
"default": {
"BACKEND": "django.core.cache.backends.locmem.LocMemCache",
}
}
)
def test_non_existing_client(self): def test_non_existing_client(self):
data = { data = {
"advertiser_id": str(self.advertiser.id), "advertiser_id": str(self.advertiser.id),
@@ -0,0 +1,35 @@
from typing import Any
from django.core.management.base import BaseCommand
from apps.campaign.models import Campaign
from apps.mlscore.models import Mlscore
class Command(BaseCommand):
help = (
"Initialize cache with current counts of "
"impressions, clicks, and ML scores."
)
def handle(self, *args: Any, **kwargs: Any) -> None:
for campaign in Campaign.objects.all():
campaign.setup_cache()
self.stdout.write(
self.style.SUCCESS(
f"Initialized cache for Campaign {campaign.id}: "
f"{campaign.impressions_count} impressions, "
f"{campaign.clicks_count} clicks."
)
)
for mlscore in Mlscore.objects.all():
mlscore.setup_cache()
self.stdout.write(
self.style.SUCCESS(
f"Initialized cache for MLscore: "
f"Client {mlscore.client_id}, "
f"Advertiser {mlscore.advertiser_id}, "
f"Score {mlscore.score}."
)
)
+119 -61
View File
@@ -1,8 +1,10 @@
import contextlib import random
from decimal import ROUND_HALF_UP, Decimal from decimal import ROUND_HALF_UP, Decimal
from logging import Logger
from typing import Any, Self from typing import Any, Self
from uuid import UUID from uuid import UUID
from django.conf import settings
from django.core.cache import cache from django.core.cache import cache
from django.core.exceptions import ValidationError from django.core.exceptions import ValidationError
from django.core.validators import ( from django.core.validators import (
@@ -22,9 +24,10 @@ from apps.campaign.validators import (
) )
from apps.client.models import Client from apps.client.models import Client
from apps.core.models import BaseModel from apps.core.models import BaseModel
from apps.mlscore.models import Mlscore
from config.errors import ConflictError, ForbiddenError from config.errors import ConflictError, ForbiddenError
logger: Logger = settings.LOGGER
class Campaign(BaseModel): class Campaign(BaseModel):
class GenderChoices(models.TextChoices): class GenderChoices(models.TextChoices):
@@ -113,6 +116,24 @@ class Campaign(BaseModel):
if self.start_date < current_date: if self.start_date < current_date:
raise ValidationError(err) from None raise ValidationError(err) from None
def save(self, *args: Any, **kwargs: Any) -> None:
created = self.pk is None
super().save(*args, **kwargs)
if created:
self.setup_cache()
def setup_cache(self) -> None:
cache.add(
f"campaign_{self.id}_impressions_count", self.impressions.count()
)
cache.add(f"campaign_{self.id}_clicks_count", self.clicks.count())
cache.set(
f"campaign_{self.id}_impressions_count", self.impressions.count()
)
cache.set(f"campaign_{self.id}_clicks_count", self.clicks.count())
@property @property
def ad_id(self) -> UUID: def ad_id(self) -> UUID:
return self.id return self.id
@@ -138,32 +159,54 @@ class Campaign(BaseModel):
and cache.get("current_date", default=0) <= self.end_date and cache.get("current_date", default=0) <= self.end_date
) )
@property
def impressions_count(self) -> int:
return cache.get(f"campaign_{self.id}_impressions_count", 0)
@property
def clicks_count(self) -> int:
return cache.get(f"campaign_{self.id}_clicks_count", 0)
def view(self, client: Client) -> None: def view(self, client: Client) -> None:
with contextlib.suppress(ConflictError): try:
CampaignImpression.objects.create( CampaignImpression.objects.create(
campaign=self, campaign_id=self.id,
client=client, client_id=client.id,
price=self.cost_per_impression, price=self.cost_per_impression,
date=cache.get("current_date", default=0), date=cache.get("current_date", default=0),
) )
try:
cache.incr(f"campaign_{self.id}_impressions_count", 1)
except ValueError:
self.setup_cache()
logger.warning(
"Seems that %s missing caches", self.campaign_id
)
except ConflictError:
pass
def click(self, client: Client) -> None: def click(self, client: Client) -> None:
if not self.active:
err = "Can't click on inactive campaign."
raise ForbiddenError(err)
try: try:
CampaignImpression.objects.get(campaign=self, client=client) CampaignImpression.objects.get(campaign=self, client=client)
except CampaignImpression.DoesNotExist: except CampaignImpression.DoesNotExist:
raise ForbiddenError from None raise ForbiddenError from None
with contextlib.suppress(ConflictError): try:
CampaignClick.objects.create( CampaignClick.objects.create(
campaign=self, campaign_id=self.id,
client=client, client_id=client.id,
price=self.cost_per_click, price=self.cost_per_click,
date=cache.get("current_date", default=0), date=cache.get("current_date", default=0),
) )
try:
cache.incr(f"campaign_{self.id}_clicks_count", 1)
except ValueError:
self.setup_cache()
logger.warning(
"Seems that %s missing caches", self.campaign_id
)
except ConflictError:
pass
def get_statistics(self) -> dict[str, Any]: def get_statistics(self) -> dict[str, Any]:
impressions = self.impressions.aggregate( impressions = self.impressions.aggregate(
@@ -278,69 +321,69 @@ class Campaign(BaseModel):
| models.Q(age_from__isnull=True) | models.Q(age_from__isnull=True)
) & (models.Q(age_to__gte=client.age) | models.Q(age_to__isnull=True)) ) & (models.Q(age_to__gte=client.age) | models.Q(age_to__isnull=True))
return ( return cls.objects.filter(
cls.objects.filter( date_filter,
date_filter, location_filter,
location_filter, gender_filter,
gender_filter, age_filter,
age_filter, ).only(
) Campaign.id.field.name,
.select_related("advertiser") Campaign.advertiser_id.field.name,
.prefetch_related("clicks", "impressions", "advertiser__mlscores") Campaign.impressions_limit.field.name,
Campaign.clicks_limit.field.name,
Campaign.cost_per_impression.field.name,
Campaign.cost_per_click.field.name,
) )
@classmethod @classmethod
def suggest(cls, client: Client) -> Self: def suggest(cls, client: Client) -> Self:
base_campaigns = cls.get_available_campaigns(client) campaigns = cls.get_available_campaigns(client)
if not base_campaigns or base_campaigns == []: if not campaigns or campaigns == []:
return None return None
advertiser_ids = list({c.advertiser_id for c in base_campaigns})
ml_scores = Mlscore.objects.filter(
client=client, advertiser_id__in=advertiser_ids
).values("advertiser_id", "score")
ml_dict = {m["advertiser_id"]: m["score"] for m in ml_scores}
campaigns = list(
base_campaigns.annotate(
impressions_count=models.Count("impressions"),
clicks_count=models.Count("clicks"),
)
)
campaign_ids = [c.id for c in campaigns] campaign_ids = [c.id for c in campaigns]
client_impressions = set( client_impressions = CampaignImpression.objects.filter(
CampaignImpression.objects.filter( client=client, campaign_id__in=campaign_ids
client=client, campaign_id__in=campaign_ids ).values_list("campaign_id", flat=True)
).values_list("campaign_id", flat=True) client_clicks = CampaignClick.objects.filter(
) client=client, campaign_id__in=campaign_ids
client_clicks = set( ).values_list("campaign_id", flat=True)
CampaignClick.objects.filter(
client=client, campaign_id__in=campaign_ids
).values_list("campaign_id", flat=True)
)
prioritized = [] prioritized = []
ml_values = [] ml_values = []
profit_values = [] profit_values = []
exceed_impressions_chance = ( # oh, can i just skip commenting this?
*(0 for i in range(4)),
*(1 for i in range(1)),
)
for campaign in campaigns: for campaign in campaigns:
if campaign.impressions_count >= campaign.impressions_limit:
continue
ml_score = ml_dict.get(campaign.advertiser_id, 0)
ml_values.append(ml_score)
has_impression = campaign.id in client_impressions has_impression = campaign.id in client_impressions
has_click = campaign.id in client_clicks has_click = campaign.id in client_clicks
if not has_impression:
allow_exceed_impressions = random.choice(
exceed_impressions_chance
)
impressions_limit = round(
campaign.impressions_limit
+ campaign.impressions_limit
* 0.01
* allow_exceed_impressions
)
if campaign.impressions_count >= impressions_limit:
continue
ml_score = cache.get(
f"mlscore_{client.id}_{campaign.advertiser_id}", 0
)
ml_values.append(ml_score)
if has_impression: if has_impression:
profit = campaign.cost_per_click if not has_click else 0 profit = campaign.cost_per_click if not has_click else 0
else: else:
profit = campaign.cost_per_impression + campaign.cost_per_click profit = campaign.cost_per_impression + campaign.cost_per_click
print(profit)
if profit <= 0:
continue
profit_values.append(profit) profit_values.append(profit)
@@ -364,28 +407,43 @@ class Campaign(BaseModel):
) )
) )
max_ml = max(ml_values) if ml_values else 1 if not ml_values or not profit_values:
max_profit = max(profit_values) if profit_values else 1 return None
min_profit = min(profit_values) if profit_values else 0
max_ml = max(ml_values)
max_profit = max(profit_values)
min_profit = min(profit_values)
profit_range = ( profit_range = (
max_profit - min_profit if max_profit != min_profit else 1 max_profit - min_profit if max_profit != min_profit else 1
) )
print(prioritized, max_ml, max_profit, min_profit, profit_range)
final_list = [] final_list = []
for campaign, metrics in prioritized: for campaign, metrics in prioritized:
norm_profit = (metrics["profit"] - min_profit) / profit_range norm_profit = (metrics["profit"] - min_profit) / profit_range
norm_ml = metrics["ml"] / max_ml if max_ml > 0 else 0 norm_ml = metrics["ml"] / max_ml if max_ml > 0 else 0
priority = ( priority = (
0.5 * norm_profit + 0.25 * norm_ml + 0.15 * metrics["capacity"] 0.7 * norm_profit + 0.2 * norm_ml + 0.1 * metrics["capacity"]
) )
final_list.append((campaign, priority)) final_list.append((campaign, priority))
final_list.sort(key=lambda x: -x[1]) final_list.sort(key=lambda x: -x[1])
return final_list[0][0] if len(final_list) >= 1 else None
if len(final_list) != 0:
campaign = final_list[0][0]
return Campaign.objects.only(
Campaign.id.field.name,
Campaign.advertiser_id.field.name,
Campaign.ad_title.field.name,
Campaign.ad_text.field.name,
Campaign.ad_image.field.name,
Campaign.cost_per_impression.field.name,
Campaign.cost_per_click.field.name,
).get(id=campaign.id)
return None
class CampaignImpression(BaseModel): class CampaignImpression(BaseModel):
@@ -1,3 +1,6 @@
from typing import Any
from django.core.cache import cache
from django.db import models from django.db import models
from apps.advertiser.models import Advertiser from apps.advertiser.models import Advertiser
@@ -21,6 +24,15 @@ class Mlscore(BaseModel):
def __str__(self) -> str: def __str__(self) -> str:
return f"{self.advertiser.name} | {self.client.login}" return f"{self.advertiser.name} | {self.client.login}"
def save(self, *args: Any, **kwargs: Any) -> None:
super().save(*args, **kwargs)
self.setup_cache()
def setup_cache(self) -> None:
cache.add(f"mlscore_{self.client_id}_{self.advertiser_id}", self.score)
cache.set(f"mlscore_{self.client_id}_{self.advertiser_id}", self.score)
class Meta: class Meta:
unique_together = ( unique_together = (
"advertiser", "advertiser",
@@ -1,5 +1,4 @@
from django.test import TestCase from django.test import TestCase, override_settings
from django.db.utils import IntegrityError
from django.core.exceptions import ValidationError from django.core.exceptions import ValidationError
from config.errors import ConflictError from config.errors import ConflictError
from apps.advertiser.models import Advertiser from apps.advertiser.models import Advertiser
@@ -17,6 +16,13 @@ class MlscoreModelTest(TestCase):
gender=Client.GenderChoices.MALE, gender=Client.GenderChoices.MALE,
) )
@override_settings(
CACHES={
"default": {
"BACKEND": "django.core.cache.backends.locmem.LocMemCache",
}
}
)
def test_create_mlscore(self): def test_create_mlscore(self):
mlscore = Mlscore.objects.create( mlscore = Mlscore.objects.create(
advertiser=self.advertiser, advertiser=self.advertiser,
@@ -27,6 +33,13 @@ class MlscoreModelTest(TestCase):
self.assertEqual(mlscore.score, 95) self.assertEqual(mlscore.score, 95)
self.assertEqual(str(mlscore), "Test Advertiser | test_client") self.assertEqual(str(mlscore), "Test Advertiser | test_client")
@override_settings(
CACHES={
"default": {
"BACKEND": "django.core.cache.backends.locmem.LocMemCache",
}
}
)
def test_mlscore_unique_together_constraint(self): def test_mlscore_unique_together_constraint(self):
Mlscore.objects.create( Mlscore.objects.create(
advertiser=self.advertiser, advertiser=self.advertiser,
@@ -41,6 +54,13 @@ class MlscoreModelTest(TestCase):
score=85, score=85,
) )
@override_settings(
CACHES={
"default": {
"BACKEND": "django.core.cache.backends.locmem.LocMemCache",
}
}
)
def test_delete_advertiser_cascades(self): def test_delete_advertiser_cascades(self):
mlscore = Mlscore.objects.create( mlscore = Mlscore.objects.create(
advertiser=self.advertiser, advertiser=self.advertiser,
@@ -51,6 +71,13 @@ class MlscoreModelTest(TestCase):
self.assertFalse(Mlscore.objects.filter(id=mlscore.id).exists()) self.assertFalse(Mlscore.objects.filter(id=mlscore.id).exists())
@override_settings(
CACHES={
"default": {
"BACKEND": "django.core.cache.backends.locmem.LocMemCache",
}
}
)
def test_delete_client_cascades(self): def test_delete_client_cascades(self):
mlscore = Mlscore.objects.create( mlscore = Mlscore.objects.create(
advertiser=self.advertiser, advertiser=self.advertiser,
+1
View File
@@ -122,6 +122,7 @@ ignore = [
"PT009", "PT009",
"PT027", "PT027",
"RUF001", "RUF001",
"S311",
] ]
logger-objects = [] logger-objects = []
per-file-ignores = {} per-file-ignores = {}
+2
View File
@@ -9,3 +9,5 @@ fi
if [ "$DJANGO_CREATE_SUPERUSER" = "True" ]; then if [ "$DJANGO_CREATE_SUPERUSER" = "True" ]; then
python manage.py createsuperuser --noinput --username "$DJANGO_SUPERUSER_USERNAME" --email "$DJANGO_SUPERUSER_EMAIL" || true python manage.py createsuperuser --noinput --username "$DJANGO_SUPERUSER_USERNAME" --email "$DJANGO_SUPERUSER_EMAIL" || true
fi fi
python manage.py init_cache