You've already forked RekomenciBackend
feat(): optimize queries by adding indexes, defining vector length and defining ef-search local parameter
This commit is contained in:
@@ -8,6 +8,7 @@ from sqlalchemy import (
|
||||
DateTime,
|
||||
Enum,
|
||||
ForeignKey,
|
||||
Index,
|
||||
Integer,
|
||||
MetaData,
|
||||
Numeric,
|
||||
@@ -178,7 +179,7 @@ resume_embedding_table: Final = Table(
|
||||
Column("deleted_at", DateTime(timezone=True)),
|
||||
Column("created_at", DateTime(timezone=True), nullable=False),
|
||||
Column("resume_id", UUID, ForeignKey("resume.id", ondelete="CASCADE"), nullable=False),
|
||||
Column("vector", Vector, nullable=False),
|
||||
Column("vector", Vector(384), nullable=False),
|
||||
)
|
||||
resume_prediction_table: Final = Table(
|
||||
"resume_prediction",
|
||||
@@ -255,7 +256,22 @@ vacancy_embedding_table: Final = Table(
|
||||
Column("deleted_at", DateTime(timezone=True)),
|
||||
Column("created_at", DateTime(timezone=True), nullable=False),
|
||||
Column("vacancy_id", UUID, ForeignKey("vacancy.id", ondelete="CASCADE"), nullable=False),
|
||||
Column("vector", Vector, nullable=False),
|
||||
Column("vector", Vector(384), nullable=False),
|
||||
)
|
||||
|
||||
key_skills_name_trgm_index: Final = Index(
|
||||
"ix_key_skills_name_trgm",
|
||||
key_skills_table.c.name,
|
||||
postgresql_using="gin",
|
||||
postgresql_ops={"name": "gin_trgm_ops"},
|
||||
)
|
||||
|
||||
vacancy_embedding_vector_cosine_index: Final = Index(
|
||||
"ix_vacancy_embedding_vector_cosine",
|
||||
vacancy_embedding_table.c.vector,
|
||||
postgresql_using="hnsw",
|
||||
postgresql_ops={"vector": "vector_cosine_ops"},
|
||||
postgresql_with={"m": 32, "ef_construction": 256},
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
from collections.abc import Sequence
|
||||
from typing import override
|
||||
|
||||
from sqlalchemy import label, select
|
||||
from sqlalchemy import label, select, text
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from template_project.adapters.data_gateways.tables import vacancy_embedding_table, vacancy_table
|
||||
@@ -16,6 +16,7 @@ class DefaultVacancyDataGateway(VacancyDataGateway):
|
||||
|
||||
@override
|
||||
async def get_suitable(self, vector: list[float]) -> Sequence[SuitableVacancy]:
|
||||
await self._session.execute(text("SET LOCAL hnsw.ef_search = 128"))
|
||||
distance_expr = vacancy_embedding_table.c.vector.cosine_distance(vector)
|
||||
similarity_expr = 1 - distance_expr
|
||||
statement = (
|
||||
|
||||
@@ -0,0 +1,37 @@
|
||||
"""empty message
|
||||
|
||||
Revision ID: 2c99db38e99b
|
||||
Revises: 9a32674539dd
|
||||
Create Date: 2025-11-23 12:08:50.774495
|
||||
|
||||
"""
|
||||
from typing import Sequence, Union
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision: str = '2c99db38e99b'
|
||||
down_revision: Union[str, Sequence[str], None] = '9a32674539dd'
|
||||
branch_labels: Union[str, Sequence[str], None] = None
|
||||
depends_on: Union[str, Sequence[str], None] = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
"""Upgrade schema."""
|
||||
op.execute("CREATE EXTENSION IF NOT EXISTS pg_trgm")
|
||||
op.execute("ALTER TABLE vacancy_embedding ALTER COLUMN vector TYPE vector(384)")
|
||||
op.execute("ALTER TABLE resume_embedding ALTER COLUMN vector TYPE vector(384)")
|
||||
# ### commands auto generated by Alembic - please adjust! ###
|
||||
op.create_index('ix_key_skills_name_trgm', 'key_skills', ['name'], unique=False, postgresql_using='gin', postgresql_ops={'name': 'gin_trgm_ops'})
|
||||
op.create_index('ix_vacancy_embedding_vector_cosine', 'vacancy_embedding', ['vector'], unique=False, postgresql_using='hnsw', postgresql_ops={'vector': 'vector_cosine_ops'}, postgresql_with={'m': 32, 'ef_construction': 256})
|
||||
# ### end Alembic commands ###
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
"""Downgrade schema."""
|
||||
# ### commands auto generated by Alembic - please adjust! ###
|
||||
op.drop_index('ix_vacancy_embedding_vector_cosine', table_name='vacancy_embedding', postgresql_using='hnsw', postgresql_ops={'vector': 'vector_cosine_ops'}, postgresql_with={'m': 32, 'ef_construction': 256})
|
||||
op.drop_index('ix_key_skills_name_trgm', table_name='key_skills', postgresql_using='gin', postgresql_ops={'name': 'gin_trgm_ops'})
|
||||
# ### end Alembic commands ###
|
||||
Reference in New Issue
Block a user