feat(): optimize queries by adding indexes, defining vector length and defining ef-search local parameter

This commit is contained in:
gitgernit
2025-11-23 12:19:22 +03:00
parent b15282baef
commit da4c8f486d
4 changed files with 58 additions and 3 deletions
@@ -3,3 +3,4 @@ CREATE DATABASE app;
\c app; \c app;
CREATE EXTENSION IF NOT EXISTS vector; CREATE EXTENSION IF NOT EXISTS vector;
CREATE EXTENSION IF NOT EXISTS pg_trgm;
@@ -8,6 +8,7 @@ from sqlalchemy import (
DateTime, DateTime,
Enum, Enum,
ForeignKey, ForeignKey,
Index,
Integer, Integer,
MetaData, MetaData,
Numeric, Numeric,
@@ -178,7 +179,7 @@ resume_embedding_table: Final = Table(
Column("deleted_at", DateTime(timezone=True)), Column("deleted_at", DateTime(timezone=True)),
Column("created_at", DateTime(timezone=True), nullable=False), Column("created_at", DateTime(timezone=True), nullable=False),
Column("resume_id", UUID, ForeignKey("resume.id", ondelete="CASCADE"), nullable=False), Column("resume_id", UUID, ForeignKey("resume.id", ondelete="CASCADE"), nullable=False),
Column("vector", Vector, nullable=False), Column("vector", Vector(384), nullable=False),
) )
resume_prediction_table: Final = Table( resume_prediction_table: Final = Table(
"resume_prediction", "resume_prediction",
@@ -255,7 +256,22 @@ vacancy_embedding_table: Final = Table(
Column("deleted_at", DateTime(timezone=True)), Column("deleted_at", DateTime(timezone=True)),
Column("created_at", DateTime(timezone=True), nullable=False), Column("created_at", DateTime(timezone=True), nullable=False),
Column("vacancy_id", UUID, ForeignKey("vacancy.id", ondelete="CASCADE"), nullable=False), Column("vacancy_id", UUID, ForeignKey("vacancy.id", ondelete="CASCADE"), nullable=False),
Column("vector", Vector, nullable=False), Column("vector", Vector(384), nullable=False),
)
key_skills_name_trgm_index: Final = Index(
"ix_key_skills_name_trgm",
key_skills_table.c.name,
postgresql_using="gin",
postgresql_ops={"name": "gin_trgm_ops"},
)
vacancy_embedding_vector_cosine_index: Final = Index(
"ix_vacancy_embedding_vector_cosine",
vacancy_embedding_table.c.vector,
postgresql_using="hnsw",
postgresql_ops={"vector": "vector_cosine_ops"},
postgresql_with={"m": 32, "ef_construction": 256},
) )
@@ -1,7 +1,7 @@
from collections.abc import Sequence from collections.abc import Sequence
from typing import override from typing import override
from sqlalchemy import label, select from sqlalchemy import label, select, text
from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.ext.asyncio import AsyncSession
from template_project.adapters.data_gateways.tables import vacancy_embedding_table, vacancy_table from template_project.adapters.data_gateways.tables import vacancy_embedding_table, vacancy_table
@@ -16,6 +16,7 @@ class DefaultVacancyDataGateway(VacancyDataGateway):
@override @override
async def get_suitable(self, vector: list[float]) -> Sequence[SuitableVacancy]: async def get_suitable(self, vector: list[float]) -> Sequence[SuitableVacancy]:
await self._session.execute(text("SET LOCAL hnsw.ef_search = 128"))
distance_expr = vacancy_embedding_table.c.vector.cosine_distance(vector) distance_expr = vacancy_embedding_table.c.vector.cosine_distance(vector)
similarity_expr = 1 - distance_expr similarity_expr = 1 - distance_expr
statement = ( statement = (
@@ -0,0 +1,37 @@
"""empty message
Revision ID: 2c99db38e99b
Revises: 9a32674539dd
Create Date: 2025-11-23 12:08:50.774495
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision: str = '2c99db38e99b'
down_revision: Union[str, Sequence[str], None] = '9a32674539dd'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
"""Upgrade schema."""
op.execute("CREATE EXTENSION IF NOT EXISTS pg_trgm")
op.execute("ALTER TABLE vacancy_embedding ALTER COLUMN vector TYPE vector(384)")
op.execute("ALTER TABLE resume_embedding ALTER COLUMN vector TYPE vector(384)")
# ### commands auto generated by Alembic - please adjust! ###
op.create_index('ix_key_skills_name_trgm', 'key_skills', ['name'], unique=False, postgresql_using='gin', postgresql_ops={'name': 'gin_trgm_ops'})
op.create_index('ix_vacancy_embedding_vector_cosine', 'vacancy_embedding', ['vector'], unique=False, postgresql_using='hnsw', postgresql_ops={'vector': 'vector_cosine_ops'}, postgresql_with={'m': 32, 'ef_construction': 256})
# ### end Alembic commands ###
def downgrade() -> None:
"""Downgrade schema."""
# ### commands auto generated by Alembic - please adjust! ###
op.drop_index('ix_vacancy_embedding_vector_cosine', table_name='vacancy_embedding', postgresql_using='hnsw', postgresql_ops={'vector': 'vector_cosine_ops'}, postgresql_with={'m': 32, 'ef_construction': 256})
op.drop_index('ix_key_skills_name_trgm', table_name='key_skills', postgresql_using='gin', postgresql_ops={'name': 'gin_trgm_ops'})
# ### end Alembic commands ###