feat(): optimize queries by adding indexes, defining vector length and defining ef-search local parameter

This commit is contained in:
gitgernit
2025-11-23 12:19:22 +03:00
parent b15282baef
commit da4c8f486d
4 changed files with 58 additions and 3 deletions
@@ -8,6 +8,7 @@ from sqlalchemy import (
DateTime,
Enum,
ForeignKey,
Index,
Integer,
MetaData,
Numeric,
@@ -178,7 +179,7 @@ resume_embedding_table: Final = Table(
Column("deleted_at", DateTime(timezone=True)),
Column("created_at", DateTime(timezone=True), nullable=False),
Column("resume_id", UUID, ForeignKey("resume.id", ondelete="CASCADE"), nullable=False),
Column("vector", Vector, nullable=False),
Column("vector", Vector(384), nullable=False),
)
resume_prediction_table: Final = Table(
"resume_prediction",
@@ -255,7 +256,22 @@ vacancy_embedding_table: Final = Table(
Column("deleted_at", DateTime(timezone=True)),
Column("created_at", DateTime(timezone=True), nullable=False),
Column("vacancy_id", UUID, ForeignKey("vacancy.id", ondelete="CASCADE"), nullable=False),
Column("vector", Vector, nullable=False),
Column("vector", Vector(384), nullable=False),
)
key_skills_name_trgm_index: Final = Index(
"ix_key_skills_name_trgm",
key_skills_table.c.name,
postgresql_using="gin",
postgresql_ops={"name": "gin_trgm_ops"},
)
vacancy_embedding_vector_cosine_index: Final = Index(
"ix_vacancy_embedding_vector_cosine",
vacancy_embedding_table.c.vector,
postgresql_using="hnsw",
postgresql_ops={"vector": "vector_cosine_ops"},
postgresql_with={"m": 32, "ef_construction": 256},
)
@@ -1,7 +1,7 @@
from collections.abc import Sequence
from typing import override
from sqlalchemy import label, select
from sqlalchemy import label, select, text
from sqlalchemy.ext.asyncio import AsyncSession
from template_project.adapters.data_gateways.tables import vacancy_embedding_table, vacancy_table
@@ -16,6 +16,7 @@ class DefaultVacancyDataGateway(VacancyDataGateway):
@override
async def get_suitable(self, vector: list[float]) -> Sequence[SuitableVacancy]:
await self._session.execute(text("SET LOCAL hnsw.ef_search = 128"))
distance_expr = vacancy_embedding_table.c.vector.cosine_distance(vector)
similarity_expr = 1 - distance_expr
statement = (
@@ -0,0 +1,37 @@
"""empty message
Revision ID: 2c99db38e99b
Revises: 9a32674539dd
Create Date: 2025-11-23 12:08:50.774495
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision: str = '2c99db38e99b'
down_revision: Union[str, Sequence[str], None] = '9a32674539dd'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
"""Upgrade schema."""
op.execute("CREATE EXTENSION IF NOT EXISTS pg_trgm")
op.execute("ALTER TABLE vacancy_embedding ALTER COLUMN vector TYPE vector(384)")
op.execute("ALTER TABLE resume_embedding ALTER COLUMN vector TYPE vector(384)")
# ### commands auto generated by Alembic - please adjust! ###
op.create_index('ix_key_skills_name_trgm', 'key_skills', ['name'], unique=False, postgresql_using='gin', postgresql_ops={'name': 'gin_trgm_ops'})
op.create_index('ix_vacancy_embedding_vector_cosine', 'vacancy_embedding', ['vector'], unique=False, postgresql_using='hnsw', postgresql_ops={'vector': 'vector_cosine_ops'}, postgresql_with={'m': 32, 'ef_construction': 256})
# ### end Alembic commands ###
def downgrade() -> None:
"""Downgrade schema."""
# ### commands auto generated by Alembic - please adjust! ###
op.drop_index('ix_vacancy_embedding_vector_cosine', table_name='vacancy_embedding', postgresql_using='hnsw', postgresql_ops={'vector': 'vector_cosine_ops'}, postgresql_with={'m': 32, 'ef_construction': 256})
op.drop_index('ix_key_skills_name_trgm', table_name='key_skills', postgresql_using='gin', postgresql_ops={'name': 'gin_trgm_ops'})
# ### end Alembic commands ###