You've already forked RekomenciBackend
feat(): optimize queries by adding indexes, defining vector length and defining ef-search local parameter
This commit is contained in:
@@ -3,3 +3,4 @@ CREATE DATABASE app;
|
|||||||
\c app;
|
\c app;
|
||||||
|
|
||||||
CREATE EXTENSION IF NOT EXISTS vector;
|
CREATE EXTENSION IF NOT EXISTS vector;
|
||||||
|
CREATE EXTENSION IF NOT EXISTS pg_trgm;
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ from sqlalchemy import (
|
|||||||
DateTime,
|
DateTime,
|
||||||
Enum,
|
Enum,
|
||||||
ForeignKey,
|
ForeignKey,
|
||||||
|
Index,
|
||||||
Integer,
|
Integer,
|
||||||
MetaData,
|
MetaData,
|
||||||
Numeric,
|
Numeric,
|
||||||
@@ -178,7 +179,7 @@ resume_embedding_table: Final = Table(
|
|||||||
Column("deleted_at", DateTime(timezone=True)),
|
Column("deleted_at", DateTime(timezone=True)),
|
||||||
Column("created_at", DateTime(timezone=True), nullable=False),
|
Column("created_at", DateTime(timezone=True), nullable=False),
|
||||||
Column("resume_id", UUID, ForeignKey("resume.id", ondelete="CASCADE"), nullable=False),
|
Column("resume_id", UUID, ForeignKey("resume.id", ondelete="CASCADE"), nullable=False),
|
||||||
Column("vector", Vector, nullable=False),
|
Column("vector", Vector(384), nullable=False),
|
||||||
)
|
)
|
||||||
resume_prediction_table: Final = Table(
|
resume_prediction_table: Final = Table(
|
||||||
"resume_prediction",
|
"resume_prediction",
|
||||||
@@ -255,7 +256,22 @@ vacancy_embedding_table: Final = Table(
|
|||||||
Column("deleted_at", DateTime(timezone=True)),
|
Column("deleted_at", DateTime(timezone=True)),
|
||||||
Column("created_at", DateTime(timezone=True), nullable=False),
|
Column("created_at", DateTime(timezone=True), nullable=False),
|
||||||
Column("vacancy_id", UUID, ForeignKey("vacancy.id", ondelete="CASCADE"), nullable=False),
|
Column("vacancy_id", UUID, ForeignKey("vacancy.id", ondelete="CASCADE"), nullable=False),
|
||||||
Column("vector", Vector, nullable=False),
|
Column("vector", Vector(384), nullable=False),
|
||||||
|
)
|
||||||
|
|
||||||
|
key_skills_name_trgm_index: Final = Index(
|
||||||
|
"ix_key_skills_name_trgm",
|
||||||
|
key_skills_table.c.name,
|
||||||
|
postgresql_using="gin",
|
||||||
|
postgresql_ops={"name": "gin_trgm_ops"},
|
||||||
|
)
|
||||||
|
|
||||||
|
vacancy_embedding_vector_cosine_index: Final = Index(
|
||||||
|
"ix_vacancy_embedding_vector_cosine",
|
||||||
|
vacancy_embedding_table.c.vector,
|
||||||
|
postgresql_using="hnsw",
|
||||||
|
postgresql_ops={"vector": "vector_cosine_ops"},
|
||||||
|
postgresql_with={"m": 32, "ef_construction": 256},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
from collections.abc import Sequence
|
from collections.abc import Sequence
|
||||||
from typing import override
|
from typing import override
|
||||||
|
|
||||||
from sqlalchemy import label, select
|
from sqlalchemy import label, select, text
|
||||||
from sqlalchemy.ext.asyncio import AsyncSession
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
||||||
from template_project.adapters.data_gateways.tables import vacancy_embedding_table, vacancy_table
|
from template_project.adapters.data_gateways.tables import vacancy_embedding_table, vacancy_table
|
||||||
@@ -16,6 +16,7 @@ class DefaultVacancyDataGateway(VacancyDataGateway):
|
|||||||
|
|
||||||
@override
|
@override
|
||||||
async def get_suitable(self, vector: list[float]) -> Sequence[SuitableVacancy]:
|
async def get_suitable(self, vector: list[float]) -> Sequence[SuitableVacancy]:
|
||||||
|
await self._session.execute(text("SET LOCAL hnsw.ef_search = 128"))
|
||||||
distance_expr = vacancy_embedding_table.c.vector.cosine_distance(vector)
|
distance_expr = vacancy_embedding_table.c.vector.cosine_distance(vector)
|
||||||
similarity_expr = 1 - distance_expr
|
similarity_expr = 1 - distance_expr
|
||||||
statement = (
|
statement = (
|
||||||
|
|||||||
@@ -0,0 +1,37 @@
|
|||||||
|
"""empty message
|
||||||
|
|
||||||
|
Revision ID: 2c99db38e99b
|
||||||
|
Revises: 9a32674539dd
|
||||||
|
Create Date: 2025-11-23 12:08:50.774495
|
||||||
|
|
||||||
|
"""
|
||||||
|
from typing import Sequence, Union
|
||||||
|
|
||||||
|
from alembic import op
|
||||||
|
import sqlalchemy as sa
|
||||||
|
|
||||||
|
|
||||||
|
# revision identifiers, used by Alembic.
|
||||||
|
revision: str = '2c99db38e99b'
|
||||||
|
down_revision: Union[str, Sequence[str], None] = '9a32674539dd'
|
||||||
|
branch_labels: Union[str, Sequence[str], None] = None
|
||||||
|
depends_on: Union[str, Sequence[str], None] = None
|
||||||
|
|
||||||
|
|
||||||
|
def upgrade() -> None:
|
||||||
|
"""Upgrade schema."""
|
||||||
|
op.execute("CREATE EXTENSION IF NOT EXISTS pg_trgm")
|
||||||
|
op.execute("ALTER TABLE vacancy_embedding ALTER COLUMN vector TYPE vector(384)")
|
||||||
|
op.execute("ALTER TABLE resume_embedding ALTER COLUMN vector TYPE vector(384)")
|
||||||
|
# ### commands auto generated by Alembic - please adjust! ###
|
||||||
|
op.create_index('ix_key_skills_name_trgm', 'key_skills', ['name'], unique=False, postgresql_using='gin', postgresql_ops={'name': 'gin_trgm_ops'})
|
||||||
|
op.create_index('ix_vacancy_embedding_vector_cosine', 'vacancy_embedding', ['vector'], unique=False, postgresql_using='hnsw', postgresql_ops={'vector': 'vector_cosine_ops'}, postgresql_with={'m': 32, 'ef_construction': 256})
|
||||||
|
# ### end Alembic commands ###
|
||||||
|
|
||||||
|
|
||||||
|
def downgrade() -> None:
|
||||||
|
"""Downgrade schema."""
|
||||||
|
# ### commands auto generated by Alembic - please adjust! ###
|
||||||
|
op.drop_index('ix_vacancy_embedding_vector_cosine', table_name='vacancy_embedding', postgresql_using='hnsw', postgresql_ops={'vector': 'vector_cosine_ops'}, postgresql_with={'m': 32, 'ef_construction': 256})
|
||||||
|
op.drop_index('ix_key_skills_name_trgm', table_name='key_skills', postgresql_using='gin', postgresql_ops={'name': 'gin_trgm_ops'})
|
||||||
|
# ### end Alembic commands ###
|
||||||
Reference in New Issue
Block a user