diff --git a/infrastructure/configs/postgres/init-db.sql b/infrastructure/configs/postgres/init-db.sql index f3183a7..745daae 100644 --- a/infrastructure/configs/postgres/init-db.sql +++ b/infrastructure/configs/postgres/init-db.sql @@ -3,3 +3,4 @@ CREATE DATABASE app; \c app; CREATE EXTENSION IF NOT EXISTS vector; +CREATE EXTENSION IF NOT EXISTS pg_trgm; diff --git a/src/template_project/adapters/data_gateways/tables.py b/src/template_project/adapters/data_gateways/tables.py index 5adf4de..a65e356 100644 --- a/src/template_project/adapters/data_gateways/tables.py +++ b/src/template_project/adapters/data_gateways/tables.py @@ -8,6 +8,7 @@ from sqlalchemy import ( DateTime, Enum, ForeignKey, + Index, Integer, MetaData, Numeric, @@ -178,7 +179,7 @@ resume_embedding_table: Final = Table( Column("deleted_at", DateTime(timezone=True)), Column("created_at", DateTime(timezone=True), nullable=False), Column("resume_id", UUID, ForeignKey("resume.id", ondelete="CASCADE"), nullable=False), - Column("vector", Vector, nullable=False), + Column("vector", Vector(384), nullable=False), ) resume_prediction_table: Final = Table( "resume_prediction", @@ -255,7 +256,22 @@ vacancy_embedding_table: Final = Table( Column("deleted_at", DateTime(timezone=True)), Column("created_at", DateTime(timezone=True), nullable=False), Column("vacancy_id", UUID, ForeignKey("vacancy.id", ondelete="CASCADE"), nullable=False), - Column("vector", Vector, nullable=False), + Column("vector", Vector(384), nullable=False), +) + +key_skills_name_trgm_index: Final = Index( + "ix_key_skills_name_trgm", + key_skills_table.c.name, + postgresql_using="gin", + postgresql_ops={"name": "gin_trgm_ops"}, +) + +vacancy_embedding_vector_cosine_index: Final = Index( + "ix_vacancy_embedding_vector_cosine", + vacancy_embedding_table.c.vector, + postgresql_using="hnsw", + postgresql_ops={"vector": "vector_cosine_ops"}, + postgresql_with={"m": 32, "ef_construction": 256}, ) diff --git a/src/template_project/adapters/data_gateways/vacancy.py b/src/template_project/adapters/data_gateways/vacancy.py index d6cada6..d9de454 100644 --- a/src/template_project/adapters/data_gateways/vacancy.py +++ b/src/template_project/adapters/data_gateways/vacancy.py @@ -1,7 +1,7 @@ from collections.abc import Sequence from typing import override -from sqlalchemy import label, select +from sqlalchemy import label, select, text from sqlalchemy.ext.asyncio import AsyncSession from template_project.adapters.data_gateways.tables import vacancy_embedding_table, vacancy_table @@ -16,6 +16,7 @@ class DefaultVacancyDataGateway(VacancyDataGateway): @override async def get_suitable(self, vector: list[float]) -> Sequence[SuitableVacancy]: + await self._session.execute(text("SET LOCAL hnsw.ef_search = 128")) distance_expr = vacancy_embedding_table.c.vector.cosine_distance(vector) similarity_expr = 1 - distance_expr statement = ( diff --git a/src/template_project/migrations/versions/2c99db38e99b_.py b/src/template_project/migrations/versions/2c99db38e99b_.py new file mode 100644 index 0000000..b5e8540 --- /dev/null +++ b/src/template_project/migrations/versions/2c99db38e99b_.py @@ -0,0 +1,37 @@ +"""empty message + +Revision ID: 2c99db38e99b +Revises: 9a32674539dd +Create Date: 2025-11-23 12:08:50.774495 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision: str = '2c99db38e99b' +down_revision: Union[str, Sequence[str], None] = '9a32674539dd' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + """Upgrade schema.""" + op.execute("CREATE EXTENSION IF NOT EXISTS pg_trgm") + op.execute("ALTER TABLE vacancy_embedding ALTER COLUMN vector TYPE vector(384)") + op.execute("ALTER TABLE resume_embedding ALTER COLUMN vector TYPE vector(384)") + # ### commands auto generated by Alembic - please adjust! ### + op.create_index('ix_key_skills_name_trgm', 'key_skills', ['name'], unique=False, postgresql_using='gin', postgresql_ops={'name': 'gin_trgm_ops'}) + op.create_index('ix_vacancy_embedding_vector_cosine', 'vacancy_embedding', ['vector'], unique=False, postgresql_using='hnsw', postgresql_ops={'vector': 'vector_cosine_ops'}, postgresql_with={'m': 32, 'ef_construction': 256}) + # ### end Alembic commands ### + + +def downgrade() -> None: + """Downgrade schema.""" + # ### commands auto generated by Alembic - please adjust! ### + op.drop_index('ix_vacancy_embedding_vector_cosine', table_name='vacancy_embedding', postgresql_using='hnsw', postgresql_ops={'vector': 'vector_cosine_ops'}, postgresql_with={'m': 32, 'ef_construction': 256}) + op.drop_index('ix_key_skills_name_trgm', table_name='key_skills', postgresql_using='gin', postgresql_ops={'name': 'gin_trgm_ops'}) + # ### end Alembic commands ###